pax_global_header 0000666 0000000 0000000 00000000064 13464300463 0014515 g ustar 00root root 0000000 0000000 52 comment=817c148592cee1a1e6abce60a2e03ea36df0db0f
flexbar-3.5.0/ 0000775 0000000 0000000 00000000000 13464300463 0013145 5 ustar 00root root 0000000 0000000 flexbar-3.5.0/.gitignore 0000664 0000000 0000000 00000000160 13464300463 0015132 0 ustar 00root root 0000000 0000000 # cmake
Makefile
CMakeFiles
CMakeCache.txt
cmake_install.cmake
# misc
flexbar
.DS_Store
wget-log
include
local
flexbar-3.5.0/CMakeLists.txt 0000664 0000000 0000000 00000000724 13464300463 0015710 0 ustar 00root root 0000000 0000000 cmake_minimum_required( VERSION 2.8.2 )
project( FLEXBAR )
set( SEQAN_APP_VERSION "3.5.0" )
include_directories( ${FLEXBAR_SOURCE_DIR}/include )
# link_directories( ${FLEXBAR_SOURCE_DIR}/lib )
# file( MAKE_DIRECTORY build )
set( EXECUTABLE_OUTPUT_PATH ${FLEXBAR_BINARY_DIR} )
add_subdirectory( src )
if( NOT CMAKE_BUILD_TYPE )
set( CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build: None Debug Release RelWithDebInfo MinSizeRel." FORCE )
endif()
flexbar-3.5.0/LICENSE.md 0000664 0000000 0000000 00000002742 13464300463 0014556 0 ustar 00root root 0000000 0000000 BSD 3-Clause License
Copyright (c) 2018, SeqAn
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flexbar-3.5.0/README.md 0000664 0000000 0000000 00000014715 13464300463 0014434 0 ustar 00root root 0000000 0000000 ## Flexbar – flexible barcode and adapter removal
The program Flexbar preprocesses high-throughput sequencing data efficiently. It demultiplexes barcoded runs and removes adapter sequences. Several adapter removal presets for Illumina libraries are included. Flexbar computes exact overlap alignments using SIMD and multicore parallelism. Moreover, trimming and filtering features are provided, e.g. trimming of homopolymers at read ends. Flexbar increases read mapping rates and improves genome as well as transcriptome assemblies. Unique molecular identifiers can be extracted in a flexible way. The software supports data in fasta and fastq format from multiple sequencing platforms.
Refer to the [manual](https://github.com/seqan/flexbar/wiki) or contact [Johannes Roehr](https://github.com/jtroehr) for support with this application.
### References
Johannes T. Roehr, Christoph Dieterich, Knut Reinert:
Flexbar 3.0 – SIMD and multicore parallelization. Bioinformatics 2017.
See article on [PubMed](https://www.ncbi.nlm.nih.gov/pubmed/28541403)
Matthias Dodt, Johannes T. Roehr, Rina Ahmed, Christoph Dieterich:
Flexbar – flexible barcode and adapter processing for next-generation sequencing platforms. Biology 2012.
See article on [PubMed](https://www.ncbi.nlm.nih.gov/pubmed/24832523)

### Download
Flexbar source code as well as binaries for Linux and Mac OS can be downloaded on the [release](https://github.com/seqan/flexbar/releases) page. Please follow instructions for building or setup of binaries below. Additionally, Flexbar is available via package manager on Debian systems, in Homebrew, and in Bioconda. Versions before 2.4 can be found on the [old](https://sourceforge.net/projects/flexbar) page.
Installation with package managers:
* Debian: `sudo apt install flexbar`
* Homebrew: `brew install brewsci/science/flexbar`
* Bioconda: `conda install -c bioconda flexbar`
To get the latest version and best performance consider to build Flexbar from source.
### Building from source
Make sure that `cmake` is available, as well as development and runtime files of the TBB library 4.0 or later (Intel Threading Building Blocks). For example on Debian systems, install the packages `libtbb-dev` and `libtbb2`. Furthermore, the SeqAn library and a compiler that supports C++14 is required:
* Get SeqAn library version 2.4.0 [here](https://github.com/seqan/seqan/releases/download/seqan-v2.4.0/seqan-library-2.4.0.tar.xz)
* Download Flexbar 3.5.0 source code [release](https://github.com/seqan/flexbar/releases)
Decompress both files:
tar xzf flexbar-3.5.0.tar.gz
tar xJf seqan-library-2.4.0.tar.xz
Move SeqAn include folder to Flexbar:
mv seqan-library-2.4.0/include flexbar-3.5.0
Use these commands for building:
cd flexbar-3.5.0
cmake .
make
Flexbar versions from 3.0 up to 3.2 require SeqAn 2.2.0 instead. Flexbar version 2.7 uses SeqAn 2.1.1 and releases prior to 2.7 use the SeqAn 1.4.2 library.
### Binaries
For execution of provided Flexbar binaries, the corresponding TBB library has to be available. Downloads contain the library file for runtime. Follow the platform specific instructions below.
#### Linux
Adjust lib search path to include the absolute path of the Flexbar directory containing the lib file libtbb.so.2 for the current terminal session, or permanently in shell startup scripts:
export LD_LIBRARY_PATH=/YourPath/flexbar-3.5.0-linux:$LD_LIBRARY_PATH
#### Mac OS
It applies the same as for Linux. Make the file libtbb.dylib available by setting the lib search path:
export DYLD_LIBRARY_PATH=/YourPath/flexbar-3.5.0-macos:$DYLD_LIBRARY_PATH
### Program usage
Flexbar needs at least one file with sequencing reads in fasta or fastq format as input. Additionally, the target name and further options can be specified. For read separation based on barcodes and for adapter removal, a file in fasta format with barcode or adapter sequences should be provided.
flexbar -r reads [-b barcodes] [-a adapters] [options]
Refer to the help screen `flexbar -h` or [manual](https://github.com/seqan/flexbar/wiki) for more information. Although default parameters of Flexbar are optimized to deliver good results in many scenarios, the adjustment of parameters like `--adapter-min-overlap` might improve results. For tests, run `flexbar_test.sh` within the test folder if `flexbar` is reachable via the path variable.
#### Quality-based trimming
In this example, reads in fastq format are trimmed based on their quality scores in Illumina version 1.8 format. The TAIL method trims the right end of reads until a quality score equal or higher than the threshold is reached, default 20. Trimmed reads are written to `target.fastq` in same format as the input.
flexbar -r reads.fq -t target -q TAIL -qf i1.8
#### Demultiplexing with barcodes
Reads that are barcoded on the left end are demultiplexed by specifying a file with barcodes in fasta format. Reads that can be assigned are written to separate files using file names that are based on the names of barcodes in the fasta file.
flexbar -r reads.fq -b barcodes.fa -bt LTAIL
#### Adapter removal single-end
To remove adapter sequences from single-end reads, specify a file with adapters in fasta format. These are removed from the right side of reads per default, if they do not align before the read start. The left side of reads is kept if long enough. The overlap of an adapter and read must have at least length 3 with at most 10% errors in default settings.
flexbar -r reads.fq -a adapters.fa -ao 3 -ae 0.1
#### Adapter removal paired-end
For paired-end libraries, specify both files with paired reads and a fasta file with adapters for removal. Given adapters are trimmed in right mode per default. It is recommended to activate the pair overlap detection in case of standard paired reads. This increases the sensitivity by removing very short parts of adapters if an overlap is detected for a pair.
flexbar -r r1.fq -p r2.fq -a a1.fa -a2 a2.fa -ap ON
#### Adapter removal presets
Several adapter presets for Illumina libraries are included in Flexbar. For example, select the `TruSeq` preset for standard TruSeq adapters and specify two read files for paired reads. If a preset is chosen, a separate file with adapters is not needed for removal. It is recommended to turn on the pair overlap detection for standard paired-end libraries.
flexbar -r r1.fq -p r2.fq -aa TruSeq -ap ON
For further examples visit the [manual](https://github.com/seqan/flexbar/wiki) page.
flexbar-3.5.0/galaxy/ 0000775 0000000 0000000 00000000000 13464300463 0014432 5 ustar 00root root 0000000 0000000 flexbar-3.5.0/galaxy/flexbar.pl 0000775 0000000 0000000 00000003240 13464300463 0016414 0 ustar 00root root 0000000 0000000 #!/usr/bin/env perl
# Flexbar wrapper for Galaxy tool definition, version 3.5.0
# Author: Johannes Roehr
use warnings;
use strict;
my $format;
my @inFiles;
my @outFiles;
my $compression = "";
foreach(0..$#ARGV){
my $arg = $ARGV[$_];
if($arg =~ /\.(fastq\w+)$/ || $arg =~ /\.(fastq\w+\.gz)$/ || $arg =~ /\.(fastq\w+\.bz2)$/){
if(defined $format && $format ne $1){
warn "Read files should have the same format.\n";
exit 1;
}
$format = $1;
my $file = $arg;
$arg =~ s/\.fastq\w+$/\.fastq/;
$arg =~ s/\.fastq\w+\.gz$/\.fastq\.gz/;
$arg =~ s/\.fastq\w+\.bz2$/\.fastq\.bz2/;
$compression = "GZ" if $arg =~ /\.fastq\.gz$/;
$compression = "BZ2" if $arg =~ /\.fastq\.bz2$/;
$ARGV[$_] = $arg;
if($arg =~ /\.dat_input\w\.fastq$/ || $arg =~ /\.dat_input\w\.fastq\.gz$/ || $arg =~ /\.dat_input\w\.fastq\.bz2$/){
push @inFiles, $arg;
rename $file, $arg;
}
push @outFiles, $arg if $arg =~ /\.dat\.fastq$/ || $arg =~ /\.dat\.fastq\.gz$/ || $arg =~ /\.dat\.fastq\.bz2$/;
}
}
my $barcoded = 0;
$barcoded = 1 if $ARGV[$#ARGV] =~ /barcoded$/;
my $call = join " ", @ARGV[0..($#ARGV - $barcoded)];
# $call = $call ." --zip-output ". $compression if $barcoded && $compression ne "";
system $call and exit 1;
unlink $_ or warn "Could not unlink $_: $!" foreach(@inFiles);
if($barcoded){
$format =~ s/\.gz//;
$format =~ s/\.bz2//;
foreach(<$ARGV[$#ARGV]/flexbarOut*.fastq*>){
my $file = $_;
s/fastq$/$format/;
# s/fastq\.gz$/$format/;
# s/fastq\.bz2$/$format/;
rename $file, $_;
}
}
else{
foreach(@outFiles){
my $file = $_;
s/\.fastq$//;
s/\.fastq\.gz$//;
s/\.fastq\.bz2$//;
rename $file, $_;
}
}
flexbar-3.5.0/galaxy/flexbar.xml 0000664 0000000 0000000 00000063540 13464300463 0016607 0 ustar 00root root 0000000 0000000
- flexible barcode and adapter removal
flexbar
flexbar --versions
reads['mode'] == 'se'
cBarcodes['select'] == "off"
reads['mode'] == "pe"
cBarcodes['select'] == "off"
reads['mode'] == "pe"
cBarcodes['select'] == "off"
reads['mode'] == "pc"
cBarcodes['select'] == "off"
cBarcodes['select'] == "on"
**Description**
The program Flexbar preprocesses high-throughput sequencing data efficiently. It demultiplexes barcoded runs and removes adapter sequences. Moreover, trimming and filtering features are provided. Flexbar increases read mapping rates and improves genome as well as transcriptome assemblies. Unique molecular identifiers can be extracted in a flexible way. The program supports sequencing data in fastq format, e.g. from the Illumina platform. Flexbar is available on the project_ page.
.. _project: https://github.com/seqan/flexbar
------
**Trim-end modes**
**Any:** longer side of read remains after overlap removal
**Left:** right side remains after removal, align before or at read end
**Right:** left part remains after removal, align after or at read start
**Left tail:** consider first n bases of reads in alignment
**Right tail:** use only last n bases, see tail-length options
------
**Documentation**
Further documentation is available on the `manual`__ page and via the command line help screen.
.. __: https://github.com/seqan/flexbar/wiki
------
**References**
Johannes T. Roehr, Christoph Dieterich, Knut Reinert:
Flexbar 3.0 – SIMD and multicore parallelization. Bioinformatics 2017.
Matthias Dodt, Johannes T. Roehr, Rina Ahmed, Christoph Dieterich:
Flexbar – flexible barcode and adapter processing for next-generation sequencing platforms. Biology 2012.
flexbar-3.5.0/src/ 0000775 0000000 0000000 00000000000 13464300463 0013734 5 ustar 00root root 0000000 0000000 flexbar-3.5.0/src/CMakeLists.txt 0000664 0000000 0000000 00000003363 13464300463 0016501 0 ustar 00root root 0000000 0000000 cmake_minimum_required( VERSION 2.8.2 )
include(CheckCXXCompilerFlag)
CHECK_CXX_COMPILER_FLAG("-std=c++14" COMPILER_SUPPORTS_CXX14)
if(COMPILER_SUPPORTS_CXX14)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
else()
message(STATUS "The compiler ${CMAKE_CXX_COMPILER} has no C++14 support. Use a different compiler.")
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
if( CMAKE_SIZEOF_VOID_P MATCHES "8" )
message( STATUS "Flexbar 64 bit architecture" )
# if( WIN32 )
# link_directories( ${FLEXBAR_SOURCE_DIR}/lib/win64 )
# elseif( ${CMAKE_SYSTEM_NAME} MATCHES "Darwin" )
# link_directories( ${FLEXBAR_SOURCE_DIR}/lib/macosx )
# else() # assuming linux, or adjust to your system's tbb lib
# link_directories( ${FLEXBAR_SOURCE_DIR}/lib/linux64 )
# endif()
else()
message( STATUS "Flexbar 32 bit architecture" )
# if( WIN32 )
# link_directories( ${FLEXBAR_SOURCE_DIR}/lib/win32 )
# else()
# message( FATAL_ERROR "Platform not supported." )
# endif()
endif()
add_executable( flexbar Flexbar.cpp )
target_link_libraries( flexbar tbb -lpthread )
find_package( ZLIB )
if( ZLIB_FOUND )
include_directories( ${ZLIB_INCLUDE_DIRS} )
target_link_libraries( flexbar ${ZLIB_LIBRARIES} )
add_definitions( "-DSEQAN_HAS_ZLIB=1" )
else()
message( STATUS "Build will not support zlib." )
endif()
find_package( BZip2 )
if( BZIP2_FOUND )
include_directories( ${BZIP2_INCLUDE_DIRS} )
target_link_libraries( flexbar ${BZIP2_LIBRARIES} )
add_definitions( "-DSEQAN_HAS_BZIP2=1" )
else()
message( STATUS "Build will not support bzip2." )
endif()
# find_package( TBB REQUIRED )
# if( NOT TBB_FOUND )
# message( FATAL_ERROR "TBB library not found." )
# endif()
set( SEQAN_CTD_EXECUTABLES ${SEQAN_CTD_EXECUTABLES} flexbar CACHE INTERNAL "" )
flexbar-3.5.0/src/Flexbar.cpp 0000664 0000000 0000000 00000001625 13464300463 0016027 0 ustar 00root root 0000000 0000000 /*==================================================
Flexbar - flexible barcode and adapter removal
Version 3.5.0
BSD 3-Clause License
uses SeqAn library release 2.4.0
and TBB library 4.0 or later
Developer: Johannes Roehr
Former contributors: Matthias Dodt
Benjamin Menkuec
Sebastian Roskosch
https://github.com/seqan/flexbar
===================================================*/
#include "Flexbar.h"
int main(int argc, const char* argv[]){
using namespace std;
using namespace seqan;
const string version = "3.5.0";
const string date = "May 2019";
ArgumentParser parser("flexbar");
defineOptions(parser, version, date);
parseCmdLine(parser, version, argc, argv);
Options o;
initOptions(o, parser);
loadOptions(o, parser);
startComputation(o);
return 0;
}
flexbar-3.5.0/src/Flexbar.h 0000664 0000000 0000000 00000023501 13464300463 0015471 0 ustar 00root root 0000000 0000000 /*
* Flexbar.h
*
* Author: jtr
*/
#ifndef FLEXBAR_FLEXBAR_H
#define FLEXBAR_FLEXBAR_H
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
// #include
// #include
#include
#include "FlexbarTypes.h"
#include "Options.h"
#include "FlexbarIO.h"
#include "LoadFasta.h"
#include "LoadAdapters.h"
#include "SeqInput.h"
#include "PairedInput.h"
#include "PairedOutput.h"
#include "PairedAlign.h"
template
void loadBarcodes(Options &o, const bool secondSet){
using namespace std;
using namespace flexbar;
string barFile = secondSet ? o.barcode2File : o.barcodeFile;
LoadFasta lf(o, false);
lf.loadSequences(barFile);
if(secondSet){
o.barcodes2 = lf.getBars();
lf.printBars("Barcode2");
if(o.barcodes2.size() == 0){
cerr << "\nERROR: No barcodes found in file.\n" << endl;
exit(1);
}
}
else{
o.barcodes = lf.getBars();
lf.printBars("Barcode");
if(o.barcodes.size() == 0){
cerr << "\nERROR: No barcodes found in file.\n" << endl;
exit(1);
}
}
}
template
void loadAdapters(Options &o, const bool secondSet, const bool useAdapterFile){
using namespace std;
using namespace flexbar;
if(o.aPreset != APOFF){
LoadAdapters la(o);
la.loadSequences(secondSet);
if(secondSet) o.adapters2 = la.getAdapters();
else o.adapters = la.getAdapters();
if(secondSet) la.printAdapters("Adapter2");
else la.printAdapters("Adapter");
}
else{
LoadFasta lf(o, true);
if(useAdapterFile){
string adapFile = secondSet ? o.adapter2File : o.adapterFile;
lf.loadSequences(adapFile);
if(secondSet){
o.adapters2 = lf.getBars();
if(o.adapters2.size() == 0){
cerr << "\nERROR: No adapters found in file.\n" << endl;
exit(1);
}
}
else{
o.adapters = lf.getBars();
if(o.adapters.size() == 0){
cerr << "\nERROR: No adapters found in file.\n" << endl;
exit(1);
}
}
}
else{
if(o.rcMode == RCOFF || o.rcMode == RCON){
TBar bar;
bar.id = "cmdline";
bar.seq = o.adapterSeq;
o.adapters.push_back(bar);
}
if(o.rcMode == RCON || o.rcMode == RCONLY){
TSeqStr adapterSeqRC = o.adapterSeq;
seqan::reverseComplement(adapterSeqRC);
TBar barRC;
barRC.id = "cmdline_rc";
barRC.seq = adapterSeqRC;
o.adapters.push_back(barRC);
}
lf.setBars(o.adapters);
}
if(secondSet) lf.printBars("Adapter2");
else lf.printBars("Adapter");
}
}
template
void loadBarcodesAndAdapters(Options &o){
using namespace std;
using namespace flexbar;
if(o.barDetect != BOFF){
loadBarcodes(o, false);
if(o.barDetect == WITHIN_READ2 || o.barDetect == WITHIN_READ_REMOVAL2)
loadBarcodes(o, true);
}
if(o.adapRm != AOFF){
loadAdapters(o, false, o.useAdapterFile);
if(o.adapRm == NORMAL2)
loadAdapters(o, true, true);
}
}
void printComputationTime(Options &o, const time_t start, const unsigned long nReads){
using namespace std;
time_t end;
time(&end);
int totalTime = int(difftime(end, start));
int hours = div(totalTime, 3600).quot;
int rest = div(totalTime, 3600).rem;
int minutes = div(rest, 60).quot;
int seconds = div(rest, 60).rem;
ostream *out = o.out;
*out << "Elapsed time: ";
if(totalTime >= 1) *out << " ";
if(hours > 0) *out << hours << " h ";
if(hours > 0 || minutes > 0) *out << minutes << " min ";
if(hours > 0 || minutes > 0 || seconds > 0) *out << seconds << " sec\n";
else *out << "< 1 sec\n";
if(totalTime >= 1)
*out << "Processing speed: " << nReads / totalTime << " reads/s\n\n" << endl;
else *out << "\n" << endl;
}
std::string alignValue(const int refLength, const unsigned long value){
using namespace std;
stringstream s; s << value;
int wSpaceLen = refLength - s.str().length();
if(wSpaceLen < 0) wSpaceLen = 0;
return string(wSpaceLen, ' ') + s.str();
}
void printMessage(Options &o){
using namespace std;
using namespace flexbar;
string s = "Flexbar completed ";
if(o.barDetect != BOFF) s += "barcode";
if(o.barDetect == WITHIN_READ_REMOVAL) s += " removal within reads";
if(o.barDetect == WITHIN_READ) s += " detection within reads";
if(o.barDetect == BARCODE_READ) s += " detection with separate reads";
if(o.barDetect != BOFF && (o.adapRm != AOFF || o.poMode != POFF)) s += " and ";
if(o.barDetect == BOFF && o.adapRm == AOFF && o.poMode == POFF) s += "basic processing";
if(o.adapRm != AOFF || o.poMode != POFF) s += "adapter removal";
*o.out << s << ".\n" << endl;
if(! o.logStdout) closeFile(o.fstrmOut);
}
template
void startProcessing(Options &o){
using namespace std;
using namespace flexbar;
time_t start;
time(&start);
ostream *out = o.out;
*out << "\nProcessing reads ..." << flush;
if(o.logAlign != NONE) *out << "\n\nAlignment " << o.logAlignStr << " logging:\n\n" << endl;
PairedInput inputFilter(o);
PairedAlign alignFilter(o);
PairedOutput outputFilter(o);
tbb::task_scheduler_init init_serial(o.nThreads);
tbb::pipeline pipe;
pipe.add_filter(inputFilter);
pipe.add_filter(alignFilter);
pipe.add_filter(outputFilter);
pipe.run(o.nThreads);
if(o.logAlign == TAB) *out << "\n";
*out << "done.\n" << endl;
const unsigned long nReads = inputFilter.getNrProcessedReads();
printComputationTime(o, start, nReads);
// barcode and adapter removal statistics
if(o.writeLengthDist) outputFilter.writeLengthDist();
if(o.poMode != POFF) alignFilter.printPairOverlapStats();
if(o.adapRm != AOFF){
outputFilter.printAdapterRemovalStats();
alignFilter.printAdapterOverlapStats();
if(o.adapRm == NORMAL2){
outputFilter.printAdapterRemovalStats2();
alignFilter.printAdapterOverlapStats2();
}
}
outputFilter.printFileSummary();
// summary statistics of filtering
const unsigned long nChars = inputFilter.getNrProcessedChars();
const unsigned long uncalled = inputFilter.getNrUncalledReads();
const unsigned long uPairs = inputFilter.getNrUncalledPairedReads();
unsigned long nGoodReads = outputFilter.getNrGoodReads();
unsigned long nGoodChars = outputFilter.getNrGoodChars();
if(o.isPaired && o.writeSingleReadsP){
nGoodReads -= outputFilter.getNrSingleReads();
nGoodChars -= outputFilter.getNrSingleReads();
}
stringstream s; s << nReads;
int len = s.str().length();
*out << "Filtering statistics\n";
*out << "====================\n";
*out << "Processed reads " << nReads << endl;
*out << " skipped due to uncalled bases ";
if(o.isPaired){
*out << alignValue(len, 2 * uPairs);
if(uncalled > 0)
*out << " (" << uncalled << " uncalled in " << uPairs << " pairs)";
*out << endl;
}
else *out << alignValue(len, uncalled) << endl;
if(o.qTrim != QOFF && ! o.qtrimPostRm)
*out << " trimmed due to low quality " << alignValue(len, inputFilter.getNrLowPhredReads()) << endl;
if(o.barDetect != BOFF && ! o.writeUnassigned)
*out << " skipped unassigned reads " << alignValue(len, alignFilter.getNrUnassignedReads()) << endl;
if(o.adapRm != AOFF || o.poMode != POFF)
*out << " short prior to adapter removal " << alignValue(len, alignFilter.getNrPreShortReads()) << endl;
if(o.qTrim != QOFF && o.qtrimPostRm)
*out << " trimmed due to low quality " << alignValue(len, outputFilter.getNrLowPhredReads()) << endl;
*out << " finally skipped short reads " << alignValue(len, outputFilter.getNrShortReads()) << endl;
if(o.isPaired && ! o.writeSingleReads && ! o.writeSingleReadsP)
*out << " skipped paired single reads " << alignValue(len, outputFilter.getNrSingleReads()) << endl;
*out << "Discarded reads overall " << alignValue(len, nReads - nGoodReads) << endl;
*out << "Remaining reads " << alignValue(len, nGoodReads);
if(nReads > 0)
*out << " (" << fixed << setprecision(2) << 100 * nGoodReads / nReads << "%)";
stringstream schar; schar << inputFilter.getNrProcessedChars();
int clen = schar.str().length();
*out << "\n" << endl;
*out << "Processed bases " << alignValue(clen, nChars) << endl;
*out << "Remaining bases " << alignValue(clen, nGoodChars);
if(nChars > 0)
*out << " (" << fixed << setprecision(2) << 100 * nGoodChars / nChars << "% of input)";
*out << "\n\n" << endl;
printMessage(o);
}
void performTest(){
using namespace std;
using namespace seqan;
}
void startComputation(Options &o){
// performTest();
using namespace std;
using namespace flexbar;
loadBarcodesAndAdapters(o);
if(o.cmprsType == GZ){
#if SEQAN_HAS_ZLIB
startProcessing(o);
#else
o.outCompression = "";
o.cmprsType = UNCOMPRESSED;
cerr << "Output file compression inactive.\n"
<< "This build does not support zlib!\n" << endl;
#endif
}
else if(o.cmprsType == BZ2){
#if SEQAN_HAS_BZIP2
startProcessing(o);
#else
o.outCompression = "";
o.cmprsType = UNCOMPRESSED;
cerr << "Output file compression inactive.\n"
<< "This build does not support bzip2!\n" << endl;
#endif
}
if(o.cmprsType == UNCOMPRESSED){
startProcessing(o);
}
}
#endif
flexbar-3.5.0/src/FlexbarIO.h 0000664 0000000 0000000 00000021602 13464300463 0015721 0 ustar 00root root 0000000 0000000 /*
* FlexbarIO.h
*
* Author: jtr
*/
#ifndef FLEXBAR_FLEXBARIO_H
#define FLEXBAR_FLEXBARIO_H
#include
#include
#include
#if SEQAN_HAS_ZLIB
#include
#endif
#if SEQAN_HAS_BZIP2
#include
#endif
void openInputFile(std::fstream &strm, std::string path){
using namespace std;
strm.open(path.c_str(), ios::in | ios::binary);
if(! strm.good()){
cerr << "\nERROR: Could not open file " << path << "\n" << endl;
exit(1);
}
}
void openOutputFile(std::fstream &strm, std::string path){
using namespace std;
strm.open(path.c_str(), ios::out | ios::binary);
if(! strm.good()){
cerr << "\nERROR: Could not open file " << path << "\n" << endl;
exit(1);
}
}
void closeFile(std::fstream &strm){
strm.close();
}
namespace seqan{
// Extension for input fasta file with dat ending
struct DatFastaAdaptor_;
using DatFastaAdaptor = Tag;
// Specilaize sequence input file with custom tag
using DatFastaSeqFileIn = FormattedFile;
// Your custom format tag
struct DatFastaSeqFormat_;
using DatFastaSeqFormat = Tag;
// The extended TagList containing our custom format
using DatFastaSeqInFormats = TagList;
// Overloaded file format metafunction
template <>
struct FileFormat >{
using Type = TagSelector;
};
// Set magic header
template
struct MagicHeader : public MagicHeader{};
// Specify the valid ending for your fasta adaptor
template
struct FileExtensions{
static char const * VALUE[1];
};
template
char const * FileExtensions::VALUE[1] = { ".dat" };
// Overload inner readRecord function
template
inline void
readRecord(TIdString & id, TSeqString & seq, FormattedFile & file, DatFastaSeqFormat){
readRecord(id, seq, file.iter, Fasta()); // Delegate to Fasta parser
}
// Extension for input fastq file with dat ending
struct DatFastqAdaptor_;
using DatFastqAdaptor = Tag;
// Specilaize sequence input file with custom tag
using DatFastqSeqFileIn = FormattedFile;
// Your custom format tag
struct DatFastqSeqFormat_;
using DatFastqSeqFormat = Tag;
// The extended TagList containing our custom format
using DatFastqSeqInFormats = TagList;
// Overloaded file format metafunction
template <>
struct FileFormat >{
using Type = TagSelector;
};
// Set magic header
template
struct MagicHeader : public MagicHeader{};
// Specify the valid ending for your fastq adaptor
template
struct FileExtensions{
static char const * VALUE[1];
};
template
char const * FileExtensions::VALUE[1] = { ".dat" };
// Overload inner readRecord function
template
inline void
readRecord(TIdString & id, TSeqString & seq, TIdString & qual, FormattedFile & file, DatFastqSeqFormat){
readRecord(id, seq, qual, file.iter, Fastq()); // Delegate to Fastq parser
}
template
inline void
readRecord(TIdString & id, TSeqString & seq, FormattedFile & file, DatFastqSeqFormat){
readRecord(id, seq, file.iter, Fasta()); // Delegate to Fasta parser
}
// Extension for input fastq file with txt ending
struct FlexbarReadsAdaptor_;
using FlexbarReadsAdaptor = Tag;
// Specilaize sequence input file with custom tag
using FlexbarReadsSeqFileIn = FormattedFile;
// Your custom format tag
struct FlexbarReadsSeqFormat_;
using FlexbarReadsSeqFormat = Tag;
// The extended TagList containing our custom format
using FlexbarReadsSeqInFormats = TagList;
// Overloaded file format metafunction
template <>
struct FileFormat >{
using Type = TagSelector;
};
// Set magic header
template
struct MagicHeader : public MagicHeader{};
// Specify the valid ending for your fastq adaptor
template
struct FileExtensions{
static char const * VALUE[1];
};
template
char const * FileExtensions::VALUE[1] = { ".txt" };
// Overload inner readRecord function
template
inline void
readRecord(TIdString & id, TSeqString & seq, TIdString & qual, FormattedFile & file, FlexbarReadsSeqFormat){
readRecord(id, seq, qual, file.iter, Fastq()); // Delegate to Fastq parser
}
template
inline void
readRecord(TIdString & id, TSeqString & seq, FormattedFile & file, FlexbarReadsSeqFormat){
readRecord(id, seq, file.iter, Fasta()); // Delegate to Fasta parser
}
// Extension for output reads file with dat ending
using FlexbarReadsSeqFileOut = FormattedFile;
using FlexbarReadsSeqOutFormats = TagList;
template <>
struct FileFormat >{
using Type = TagSelector;
};
// Inner writeRecord function
template
inline void
writeRecord(FormattedFile & file, TIdString & id, TSeqString & seq, TIdString & qual){
writeRecord(file.iter, id, seq, qual, Fastq()); // Delegate to Fastq parser
}
template
inline void
writeRecord(FormattedFile & file, TIdString & id, TSeqString & seq){
writeRecord(file.iter, id, seq, Fasta()); // Delegate to Fasta parser
}
}
void checkFileCompression(const std::string path){
using namespace std;
using namespace flexbar;
using seqan::CharString;
using seqan::suffix;
using seqan::length;
CompressionType cmprsType = UNCOMPRESSED;
if(length(path) > 3){
CharString ending = suffix(path, length(path) - 3);
if(ending == ".gz"){
#if SEQAN_HAS_ZLIB
cmprsType = GZ;
#else
cerr << "\nInput file decompression canceled.\n";
cerr << "This build does not support zlib.\n" << endl;
exit(1);
#endif
}
else if(length(path) > 4){
ending = suffix(path, length(path) - 4);
if(ending == ".bz2"){
#if SEQAN_HAS_BZIP2
cmprsType = BZ2;
#else
cerr << "\nInput file decompression canceled.\n";
cerr << "This build does not support bzip2.\n" << endl;
exit(1);
#endif
}
}
}
}
void checkInputType(const std::string path, flexbar::FileFormat &format, const bool isReadsFile){
using namespace std;
using namespace flexbar;
checkFileCompression(path);
if(path == "-" && isReadsFile){
char c;
if(cin) c = cin.peek();
else{
cerr << "\nERROR: Could not read from standard input stream.\n" << endl;
exit(1);
}
if(c == '>') format = FASTA;
else if(c == '@') format = FASTQ;
else{
cerr << "\nERROR: Format of reads from standard input not conform.\n";
cerr << "Use uncompressed fasta or fastq for stdin.\n" << endl;
exit(1);
}
}
else{
seqan::FlexbarReadsSeqFileIn seqFileIn;
if(! open(seqFileIn, path.c_str())){
cerr << "\nERROR: Could not open file " << path << "\n" << endl;
exit(1);
}
try{
if(! atEnd(seqFileIn)){
FString id, seq, qual;
readRecord(id, seq, qual, seqFileIn);
if(qual == "") format = FASTA;
else format = FASTQ;
}
else{
cerr << "\nReads file seems to be empty.\n\n" << endl;
close(seqFileIn);
exit(1);
}
}
catch(seqan::Exception const &e){
cerr << "\nERROR: " << e.what() << "\nProgram execution aborted.\n" << endl;
close(seqFileIn);
exit(1);
}
close(seqFileIn);
}
}
std::string getExtension(const flexbar::FileFormat format){
using namespace flexbar;
if(format == FASTA) return ".fasta";
else return ".fastq";
}
// void runQualityCheck(std::string path){
//
// using namespace std;
//
// if(! system(NULL)) exit(EXIT_FAILURE);
//
// string call = "qcCommand " + path + " &> qc.out";
//
// if(system(call.c_str()) != 0){
// cerr << "\nERROR: quality control program execution.\n" << endl;
// }
// }
#endif
flexbar-3.5.0/src/FlexbarTypes.h 0000664 0000000 0000000 00000007104 13464300463 0016517 0 ustar 00root root 0000000 0000000 // FlexbarTypes.h
#ifndef FLEXBAR_FLEXBARTYPES_H
#define FLEXBAR_FLEXBARTYPES_H
template
class SeqRead {
public:
TSeqStr seq;
TString id, qual, umi;
bool rmAdapter, rmAdapterRC, pairOverlap, poRemoval;
SeqRead(TSeqStr& sequence, TString& seqID) :
seq(sequence),
id(seqID),
rmAdapter(false),
rmAdapterRC(false),
pairOverlap(false),
poRemoval(false){
}
SeqRead(TSeqStr& sequence, TString& seqID, TString& quality) :
seq(sequence),
id(seqID),
qual(quality),
rmAdapter(false),
rmAdapterRC(false),
pairOverlap(false),
poRemoval(false){
}
};
template
class PairedRead {
typedef SeqRead TSeqRead;
public:
TSeqRead *r1, *r2, *b;
unsigned int barID, barID2;
PairedRead(TSeqRead *p_r1, TSeqRead *p_r2, TSeqRead *p_b) :
r1(p_r1),
r2(p_r2),
b(p_b),
barID(0),
barID2(0){
}
virtual ~PairedRead(){
delete r1;
delete r2;
delete b;
}
};
template
struct AlignResults{
int score, mismatches, gapsR, gapsA;
int startPos, startPosA, startPosS;
int endPos, endPosS, endPosA;
int overlapLength, queryLength, tailLength;
float allowedErrors;
TSeqStr umiTag;
std::string alString;
AlignResults(){}
};
namespace flexbar{
const unsigned int MAX_READLENGTH = 2048;
typedef seqan::Dna5String FSeqStr;
typedef seqan::CharString FString;
typedef seqan::StringSet TSeqStrs;
typedef seqan::StringSet TStrings;
typedef seqan::StringSet TBools;
typedef SeqRead TSeqRead;
typedef PairedRead TPairedRead;
typedef seqan::Align TAlign;
typedef seqan::StringSet TAlignSet;
typedef seqan::String TAlignScores;
struct Alignments {
TAlignSet aset;
TAlignScores ascores;
};
typedef std::vector TAlignBundle;
typedef std::vector TPairedReadBundle;
// typedef seqan::StringSet > TAlignSet;
// struct SeqReadData {
// TSeqStrs seqs;
// TStrings ids, quals;
// TBools uncalled;
//
// SeqReadData(){}
// };
// struct PairedReadBundle {
// SeqReadData srd, srd2, srdBR;
// TPairedReads pReads;
//
// PairedReadBundle(){}
// };
struct TBar {
FString id;
FSeqStr seq;
bool rcAdapter;
tbb::atomic rmOverlap, rmFull;
TBar() :
rmOverlap(0),
rmFull(0),
rcAdapter(false){
}
};
struct Adapters {
FString id, info;
FSeqStr seq1, seq2, seqc;
};
enum AdapterPreset {
APOFF,
TRUSEQ,
SMALLRNA,
METHYL,
RIBO,
NEXTERA,
NEXTERAMP
};
enum PairOverlap {
POFF,
PON,
PSHORT,
PONLY
};
enum RevCompMode {
RCOFF,
RCON,
RCONLY
};
enum AlignmentMode {
ALIGNALL,
ALIGNRCOFF,
ALIGNRC
};
enum ComputeCycle {
PRELOAD,
COMPUTE,
RESULTS
};
enum LogAlign {
NONE,
ALL,
TAB,
MOD
};
enum CompressionType {
UNCOMPRESSED,
GZ,
BZ2
};
enum TrimEnd {
ANY,
LEFT,
RIGHT,
LTAIL,
RTAIL
};
enum FileFormat {
FASTA,
FASTQ
};
enum QualityType {
SANGER,
SOLEXA,
ILLUMINA
};
enum QualTrimType {
QOFF,
TAIL,
WIN,
WINTAIL,
BWA
};
enum BarcodeDetect {
BARCODE_READ,
WITHIN_READ,
WITHIN_READ_REMOVAL,
WITHIN_READ2,
WITHIN_READ_REMOVAL2,
BOFF
};
enum AdapterRemoval {
NORMAL,
NORMAL2,
AONE,
ATWO,
AOFF
};
enum AdapterTrimmed {
ATON,
ATOFF,
ATONLY
};
enum RunType {
SINGLE,
PAIRED,
SINGLE_BARCODED,
PAIRED_BARCODED
};
}
#endif
flexbar-3.5.0/src/LoadAdapters.h 0000664 0000000 0000000 00000007746 13464300463 0016466 0 ustar 00root root 0000000 0000000 // LoadAdapters.h
#ifndef FLEXBAR_LOADADAPTERS_H
#define FLEXBAR_LOADADAPTERS_H
template
class LoadAdapters {
private:
std::ostream *out;
tbb::concurrent_vector adapters;
flexbar::Adapters a;
const flexbar::AdapterPreset m_aPreset;
const flexbar::RevCompMode m_rcMode;
public:
LoadAdapters(const Options &o) :
out(o.out),
m_aPreset(o.aPreset),
m_rcMode(o.rcMode){
using namespace flexbar;
// Illumina sequencing adapters
// Oligonucleotide sequences © 2018 Illumina, Inc. All rights reserved.
// Obtained from https://support.illumina.com/bulletins/2016/12/what-sequences-do-i-use-for-adapter-trimming.html
if(m_aPreset == TRUSEQ){
a.id = "TruSeq";
a.seq1 = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA";
a.seq2 = "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT";
a.info = "TruSeq LT and TruSeq HT-based kits";
}
else if(m_aPreset == METHYL){
a.id = "TrueSeq-Methyl";
a.seq1 = "AGATCGGAAGAGCACACGTCTGAAC";
a.seq2 = "AGATCGGAAGAGCGTCGTGTAGGGA";
a.info = "ScriptSeq and TruSeq DNA Methylation";
}
else if(m_aPreset == SMALLRNA){
a.id = "TrueSeq-smallRNA";
a.seq1 = "TGGAATTCTCGGGTGCCAAGG";
a.info = "TruSeq Small RNA";
}
else if(m_aPreset == RIBO){
a.id = "TrueSeq-Ribo";
a.seq1 = "AGATCGGAAGAGCACACGTCT";
a.info = "TruSeq Ribo Profile";
}
else if(m_aPreset == NEXTERA){
a.id = "Nextera-TruSight";
a.seq1 = "CTGTCTCTTATACACATCT";
a.info = "AmpliSeq, Nextera, Nextera DNA Flex, Nextera DNA, Nextera XT, Nextera Enrichment, Nextera Rapid Capture Enrichment, TruSight Enrichment, TruSight Rapid Capture Enrichment, TruSight HLA";
}
else if(m_aPreset == NEXTERAMP){
a.id = "Nextera-Matepair";
a.seq1 = "GATCGGAAGAGCACACGTCTGAACTCCAGTCAC";
a.seq2 = "GATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT";
a.seqc = "CTGTCTCTTATACACATCT";
a.info = "Nextera Mate Pair";
}
// IonTorrent sequencing adapters
Adapters IonTorrent;
IonTorrent.id = "IonTorrent";
IonTorrent.seq1 = "ATCACCGACTGCCCATAGAGAGGCTGAGAC";
IonTorrent.seq1 = "CCATCTCATCCCTGCGTGTCTCCGACTCAG";
IonTorrent.seq2 = "CCTCTCTATGGGCAGTCGGTGAT";
IonTorrent.info = "IonTorrent";
};
virtual ~LoadAdapters(){};
void loadSequences(const bool secondSet){
using namespace std;
using namespace flexbar;
TString id = a.id;
TSeqStr seq;
if(! secondSet) seq = a.seq1;
else seq = a.seq2;
if(m_rcMode == RCOFF || m_rcMode == RCON){
TBar adapter;
adapter.id = id;
adapter.seq = seq;
adapters.push_back(adapter);
}
if(m_rcMode == RCON || m_rcMode == RCONLY){
TString idRC = id;
TSeqStr seqRC = seq;
append(idRC, "_rc");
seqan::reverseComplement(seqRC);
TBar adapterRC;
adapterRC.id = idRC;
adapterRC.seq = seqRC;
adapterRC.rcAdapter = true;
adapters.push_back(adapterRC);
}
if(m_aPreset == NEXTERAMP){
TString idc = id;
TSeqStr seqc = a.seqc;
append(idc, "_circ");
TBar adapter;
adapter.id = idc;
adapter.seq = seqc;
adapters.push_back(adapter);
append(idc, "_rc");
seqan::reverseComplement(seqc);
TBar adapterRC;
adapterRC.id = idc;
adapterRC.seq = seqc;
adapters.push_back(adapterRC);
}
};
tbb::concurrent_vector getAdapters(){
return adapters;
}
void printAdapters(std::string adapterName) const {
using namespace std;
const unsigned int maxSpaceLen = 23;
stringstream s; s << adapterName;
int len = s.str().length() + 1;
if(len + 2 > maxSpaceLen) len = maxSpaceLen - 2;
*out << adapterName << ":" << string(maxSpaceLen - len, ' ') << "Sequence:" << "\n";
for(unsigned int i=0; i < adapters.size(); ++i){
TString seqTag = adapters.at(i).id;
int whiteSpaceLen = maxSpaceLen - length(seqTag);
if(whiteSpaceLen < 2) whiteSpaceLen = 2;
string whiteSpace = string(whiteSpaceLen, ' ');
*out << seqTag << whiteSpace << adapters.at(i).seq << "\n";
}
*out << endl;
}
};
#endif
flexbar-3.5.0/src/LoadFasta.h 0000664 0000000 0000000 00000005247 13464300463 0015753 0 ustar 00root root 0000000 0000000 // LoadFasta.h
#ifndef FLEXBAR_LOADFASTA_H
#define FLEXBAR_LOADFASTA_H
template
class LoadFasta {
private:
std::ostream *out;
tbb::concurrent_vector bars;
const bool m_isAdapter;
const flexbar::RevCompMode m_rcMode;
public:
LoadFasta(const Options &o, const bool isAdapter) :
out(o.out),
m_rcMode(o.rcMode),
m_isAdapter(isAdapter){
};
virtual ~LoadFasta(){};
void loadSequences(const std::string filePath){
using namespace std;
using namespace flexbar;
seqan::DatFastaSeqFileIn seqFileIn;
if(! open(seqFileIn, filePath.c_str())){
cerr << "\nERROR: Could not open file " << filePath << "\n" << endl;
exit(1);
}
try{
TSeqStrs seqs;
TStrings ids;
readRecords(ids, seqs, seqFileIn);
map idMap;
for(unsigned int i = 0; i < length(ids); ++i){
if(idMap.count(ids[i]) == 1){
cerr << "Two ";
if(m_isAdapter) cerr << "adapters";
else cerr << "barcodes";
cerr << " have the same name.\n";
cerr << "Please use unique names and restart.\n" << endl;
exit(1);
}
else idMap[ids[i]] = 1;
if(! m_isAdapter || m_rcMode == RCOFF || m_rcMode == RCON){
TBar bar;
bar.id = ids[i];
bar.seq = seqs[i];
bars.push_back(bar);
}
if(m_isAdapter && (m_rcMode == RCON || m_rcMode == RCONLY)){
TString id = ids[i];
TSeqStr seq = seqs[i];
append(id, "_rc");
seqan::reverseComplement(seq);
TBar barRC;
barRC.id = id;
barRC.seq = seq;
barRC.rcAdapter = true;
bars.push_back(barRC);
}
}
}
catch(seqan::Exception const &e){
cerr << "\nERROR: " << e.what() << "\nProgram execution aborted.\n" << endl;
close(seqFileIn);
exit(1);
}
close(seqFileIn);
};
tbb::concurrent_vector getBars(){
return bars;
}
void setBars(tbb::concurrent_vector &newBars){
bars = newBars;
}
void printBars(std::string adapterName) const {
using namespace std;
const unsigned int maxSpaceLen = 23;
stringstream s; s << adapterName;
int len = s.str().length() + 1;
if(len + 2 > maxSpaceLen) len = maxSpaceLen - 2;
*out << adapterName << ":" << string(maxSpaceLen - len, ' ') << "Sequence:" << "\n";
for(unsigned int i=0; i < bars.size(); ++i){
TString seqTag = bars.at(i).id;
int whiteSpaceLen = maxSpaceLen - length(seqTag);
if(whiteSpaceLen < 2) whiteSpaceLen = 2;
string whiteSpace = string(whiteSpaceLen, ' ');
*out << seqTag << whiteSpace << bars.at(i).seq << "\n";
}
*out << endl;
}
};
#endif
flexbar-3.5.0/src/Options.h 0000664 0000000 0000000 00000124447 13464300463 0015554 0 ustar 00root root 0000000 0000000 /*
* Options.h
*
* Author: jtr
*/
#ifndef FLEXBAR_OPTIONS_H
#define FLEXBAR_OPTIONS_H
#include
#include "FlexbarIO.h"
struct Options{
std::string readsFile, readsFile2, barReadsFile;
std::string outReadsFile, outReadsFile2, outLogFile;
std::string barcodeFile, adapterFile, barcode2File, adapter2File;
std::string adapterSeq, targetName, logAlignStr, outCompression;
std::string htrimLeft, htrimRight;
bool isPaired, useAdapterFile, useNumberTag, useRemovalTag, umiTags, logStdout;
bool switch2Fasta, writeUnassigned, writeSingleReads, writeSingleReadsP, writeLengthDist;
bool useStdin, useStdout, relaxRegion, useRcTrimEnd, qtrimPostRm, addBarcodeAdapter;
bool interleavedInput, iupacInput, htrimAdapterRm, htrimMaxFirstOnly;
int cutLen_begin, cutLen_end, cutLen_read, a_tail_len, b_tail_len, p_min_overlap;
int qtrimThresh, qtrimWinSize, a_overhang, htrimMinLength, htrimMinLength2, htrimMaxLength;
int maxUncalled, min_readLen, a_min_overlap, b_min_overlap, nThreads, bundleSize, nBundles;
int a_match, a_mismatch, a_gapCost, b_match, b_mismatch, b_gapCost, a_cycles;
float a_errorRate, b_errorRate, h_errorRate;
flexbar::TrimEnd a_end, b_end, arc_end;
flexbar::FileFormat format;
flexbar::QualityType qual;
flexbar::QualTrimType qTrim;
flexbar::LogAlign logAlign;
flexbar::CompressionType cmprsType;
flexbar::RunType runType;
flexbar::BarcodeDetect barDetect;
flexbar::AdapterRemoval adapRm;
flexbar::RevCompMode rcMode;
flexbar::PairOverlap poMode;
flexbar::AdapterPreset aPreset;
flexbar::AdapterTrimmed aTrimmed;
tbb::concurrent_vector barcodes, adapters, barcodes2, adapters2;
std::ostream *out;
std::fstream fstrmOut;
Options(){
using namespace flexbar;
readsFile = "";
readsFile2 = "";
barReadsFile = "";
barcodeFile = "";
adapterFile = "";
barcode2File = "";
adapter2File = "";
outReadsFile = "";
outReadsFile2 = "";
outLogFile = "";
outCompression = "";
htrimLeft = "";
htrimRight = "";
isPaired = false;
useAdapterFile = false;
useNumberTag = false;
useRemovalTag = false;
writeUnassigned = false;
writeSingleReads = false;
writeSingleReadsP = false;
writeLengthDist = false;
switch2Fasta = false;
logStdout = false;
umiTags = false;
interleavedInput = false;
iupacInput = false;
useStdin = false;
useStdout = false;
relaxRegion = false;
useRcTrimEnd = false;
addBarcodeAdapter = false;
qtrimPostRm = false;
htrimAdapterRm = false;
htrimMaxFirstOnly = false;
cutLen_begin = 0;
cutLen_end = 0;
cutLen_read = 0;
qtrimThresh = 0;
qtrimWinSize = 0;
a_tail_len = 0;
b_tail_len = 0;
a_min_overlap = 3;
b_min_overlap = 0;
htrimMinLength2 = 0;
htrimMaxLength = 0;
nBundles = 0;
format = FASTA;
qual = SANGER;
qTrim = QOFF;
logAlign = NONE;
cmprsType = UNCOMPRESSED;
barDetect = BOFF;
adapRm = AOFF;
rcMode = RCOFF;
poMode = POFF;
a_end = RIGHT;
arc_end = RIGHT;
b_end = LTAIL;
aPreset = APOFF;
aTrimmed = ATON;
}
};
const std::string getFlexbarBanner(const seqan::CharString version){
std::string banner = "";
banner += " ________ __ \n";
banner += " / ____/ /__ _ __/ /_ ____ ______\n";
banner += " / /_ / / _ \\| |/ / __ \\/ __ `/ ___/\n";
banner += " / __/ / / __/> /_/ / /_/ / / \n";
banner += " /_/ /_/\\___/_/|_/_.___/\\__._/_/ \n\n";
banner += "Flexbar - flexible barcode and adapter removal, version ";
append(banner, version);
banner += "\nDeveloped with SeqAn, the library for sequence analysis\n";
return banner;
}
const std::string getFlexbarCitation(){
return "Johannes T. Roehr, Christoph Dieterich, Knut Reinert:\nFlexbar 3.0 - SIMD and multicore parallelization. Bioinformatics 2017.\n\nMatthias Dodt, Johannes T. Roehr, Rina Ahmed, Christoph Dieterich:\nFlexbar - flexible barcode and adapter processing for next-generation\nsequencing platforms. Biology 2012.\n";
}
const std::string getFlexbarURL(){
return "Available on github.com/seqan/flexbar\n";
}
const std::string getFlexbarDescription(){
return "The program Flexbar preprocesses high-throughput sequencing data efficiently. It demultiplexes barcoded runs and removes adapter sequences. Several adapter removal presets for Illumina libraries are included. Flexbar computes exact overlap alignments using SIMD and multicore parallelism. Moreover, trimming and filtering features are provided, e.g. trimming of homopolymers at read ends. Flexbar increases read mapping rates and improves genome as well as transcriptome assemblies. Unique molecular identifiers can be extracted in a flexible way. The software supports data in fasta and fastq format from multiple sequencing platforms. Refer to the manual on github.com/seqan/flexbar/wiki or contact Johannes Roehr on github.com/jtroehr for support with this application.";
}
void defineOptions(seqan::ArgumentParser &parser, const std::string version, const std::string date){
using namespace seqan;
typedef ArgParseArgument ARG;
setVersion(parser, version);
setDate(parser, date);
// setAppName(parser, "Flexbar");
// setCitation(parser, "\n\n" + getFlexbarCitation());
// setShortCopyright(parser, "BSD 3-Clause License");
// setLongCopyright(parser, "");
addDescription(parser, getFlexbarDescription());
setShortDescription(parser, "flexible barcode and adapter removal");
addUsageLine(parser, "\\fB-r\\fP reads [\\fB-b\\fP barcodes] [\\fB-a\\fP adapters] [options]");
addOption(parser, ArgParseOption("hm", "man-help", "Print advanced options as man document."));
addOption(parser, ArgParseOption("v", "versions", "Print Flexbar and SeqAn version numbers."));
addOption(parser, ArgParseOption("c", "cite", "Show program references for citation."));
addSection(parser, "Basic options");
addOption(parser, ArgParseOption("n", "threads", "Number of threads to employ.", ARG::INTEGER));
addOption(parser, ArgParseOption("N", "bundle", "Number of (paired) reads per thread.", ARG::INTEGER));
addOption(parser, ArgParseOption("M", "bundles", "Process only certain number of bundles for testing.", ARG::INTEGER));
addOption(parser, ArgParseOption("t", "target", "Prefix for output file names or paths.", ARG::OUTPUT_PREFIX));
addOption(parser, ArgParseOption("r", "reads", "Fasta/q file or stdin (-) with reads that may contain barcodes.", ARG::INPUT_FILE));
addOption(parser, ArgParseOption("p", "reads2", "Second input file of paired reads, gz and bz2 files supported.", ARG::INPUT_FILE));
addOption(parser, ArgParseOption("i", "interleaved", "Interleaved format for first input set with paired reads."));
addOption(parser, ArgParseOption("I", "iupac", "Accept iupac symbols in reads and convert to N if not ATCG."));
addSection(parser, "Barcode detection");
addOption(parser, ArgParseOption("b", "barcodes", "Fasta file with barcodes for demultiplexing, may contain N.", ARG::INPUT_FILE));
addOption(parser, ArgParseOption("b2", "barcodes2", "Additional barcodes file for second read set in paired mode.", ARG::INPUT_FILE));
addOption(parser, ArgParseOption("br", "barcode-reads", "Fasta/q file containing separate barcode reads for detection.", ARG::INPUT_FILE));
addOption(parser, ArgParseOption("bo", "barcode-min-overlap", "Minimum overlap of barcode and read. Default: barcode length.", ARG::INTEGER));
addOption(parser, ArgParseOption("be", "barcode-error-rate", "Error rate threshold for mismatches and gaps.", ARG::DOUBLE));
addOption(parser, ArgParseOption("bt", "barcode-trim-end", "Type of detection, see section trim-end modes.", ARG::STRING));
addOption(parser, ArgParseOption("bn", "barcode-tail-length", "Region size in tail trim-end modes. Default: barcode length.", ARG::INTEGER));
addOption(parser, ArgParseOption("bk", "barcode-keep", "Keep barcodes within reads instead of removal."));
addOption(parser, ArgParseOption("bu", "barcode-unassigned", "Include unassigned reads in output generation."));
addOption(parser, ArgParseOption("bm", "barcode-match", "Alignment match score.", ARG::INTEGER));
addOption(parser, ArgParseOption("bi", "barcode-mismatch", "Alignment mismatch score.", ARG::INTEGER));
addOption(parser, ArgParseOption("bg", "barcode-gap", "Alignment gap score.", ARG::INTEGER));
addSection(parser, "Adapter removal");
addOption(parser, ArgParseOption("a", "adapters", "Fasta file with adapters for removal that may contain N.", ARG::INPUT_FILE));
addOption(parser, ArgParseOption("a2", "adapters2", "File with extra adapters for second read set in paired mode.", ARG::INPUT_FILE));
addOption(parser, ArgParseOption("as", "adapter-seq", "Single adapter sequence as alternative to adapters option.", ARG::STRING));
addOption(parser, ArgParseOption("aa", "adapter-preset", "", ARG::STRING));
addOption(parser, ArgParseOption("ao", "adapter-min-overlap", "Minimum overlap for removal without pair overlap.", ARG::INTEGER));
addOption(parser, ArgParseOption("ae", "adapter-error-rate", "Error rate threshold for mismatches and gaps.", ARG::DOUBLE));
addOption(parser, ArgParseOption("at", "adapter-trim-end", "Type of removal, see section trim-end modes.", ARG::STRING));
addOption(parser, ArgParseOption("an", "adapter-tail-length", "Region size for tail trim-end modes. Default: adapter length.", ARG::INTEGER));
// addOption(parser, ArgParseOption("ah", "adapter-overhang", "Overhang at read ends in right and left modes.", ARG::INTEGER));
addOption(parser, ArgParseOption("ax", "adapter-relaxed", "Skip restriction to pass read ends in right and left modes."));
addOption(parser, ArgParseOption("ap", "adapter-pair-overlap", "Overlap detection of paired reads.", ARG::STRING));
addOption(parser, ArgParseOption("av", "adapter-min-poverlap", "Minimum overlap of paired reads for detection.", ARG::INTEGER));
addOption(parser, ArgParseOption("ac", "adapter-revcomp", "Include reverse complements of adapters.", ARG::STRING));
addOption(parser, ArgParseOption("ad", "adapter-revcomp-end", "Use different trim-end for reverse complements of adapters.", ARG::STRING));
addOption(parser, ArgParseOption("ab", "adapter-add-barcode", "Add reverse complement of detected barcode to adapters."));
addOption(parser, ArgParseOption("ar", "adapter-read-set", "Consider only single read set for adapters.", ARG::STRING));
addOption(parser, ArgParseOption("ak", "adapter-trimmed-out", "Modify that trimmed reads are kept.", ARG::STRING));
addOption(parser, ArgParseOption("ay", "adapter-cycles", "Number of adapter removal cycles.", ARG::INTEGER));
addOption(parser, ArgParseOption("am", "adapter-match", "Alignment match score.", ARG::INTEGER));
addOption(parser, ArgParseOption("ai", "adapter-mismatch", "Alignment mismatch score.", ARG::INTEGER));
addOption(parser, ArgParseOption("ag", "adapter-gap", "Alignment gap score.", ARG::INTEGER));
// addSection(parser, "Joining paired reads");
// addOption(parser, ArgParseOption("j", "join", "Align paired reads and join them in case of sufficient overlap."));
// addOption(parser, ArgParseOption("jo", "join-min-overlap", "Minimum overlap of adapter and read sequence.", ARG::INTEGER));
// addOption(parser, ArgParseOption("jt", "join-error-rate", "Error rate threshold for mismatches and gaps.", ARG::DOUBLE));
// addOption(parser, ArgParseOption("jm", "join-match", "Alignment match score.", ARG::INTEGER));
// addOption(parser, ArgParseOption("ji", "join-mismatch", "Alignment mismatch score.", ARG::INTEGER));
// addOption(parser, ArgParseOption("jg", "join-gap", "Alignment gap score.", ARG::INTEGER));
addSection(parser, "Filtering and trimming");
addOption(parser, ArgParseOption("u", "max-uncalled", "Allowed uncalled bases N for each read.", ARG::INTEGER));
addOption(parser, ArgParseOption("x", "pre-trim-left", "Trim given number of bases on 5' read end before detection.", ARG::INTEGER));
addOption(parser, ArgParseOption("y", "pre-trim-right", "Trim specified number of bases on 3' end prior to detection.", ARG::INTEGER));
addOption(parser, ArgParseOption("k", "post-trim-length", "Trim to specified read length from 3' end after removal.", ARG::INTEGER));
addOption(parser, ArgParseOption("m", "min-read-length", "Minimum read length to remain after removal.", ARG::INTEGER));
addSection(parser, "Quality-based trimming");
addOption(parser, ArgParseOption("q", "qtrim", "Quality-based trimming mode.", ARG::STRING));
addOption(parser, ArgParseOption("qf", "qtrim-format", "Quality format.", ARG::STRING));
addOption(parser, ArgParseOption("qt", "qtrim-threshold", "Minimum quality as threshold for trimming.", ARG::INTEGER));
addOption(parser, ArgParseOption("qw", "qtrim-win-size", "Region size for sliding window approach.", ARG::INTEGER));
addOption(parser, ArgParseOption("qa", "qtrim-post-removal", "Perform quality-based trimming after removal steps."));
addSection(parser, "Trimming of homopolymers");
addOption(parser, ArgParseOption("hl", "htrim-left", "Trim specific homopolymers on left read end after removal.", ARG::STRING));
addOption(parser, ArgParseOption("hr", "htrim-right", "Trim certain homopolymers on right read end after removal.", ARG::STRING));
addOption(parser, ArgParseOption("hi", "htrim-min-length", "Minimum length of homopolymers at read ends.", ARG::INTEGER));
addOption(parser, ArgParseOption("h2", "htrim-min-length2", "Minimum length for homopolymers specified after first one.", ARG::INTEGER));
addOption(parser, ArgParseOption("hx", "htrim-max-length", "Maximum length of homopolymers on left and right read end.", ARG::INTEGER));
addOption(parser, ArgParseOption("hf", "htrim-max-first", "Apply maximum length of homopolymers only for first one."));
addOption(parser, ArgParseOption("he", "htrim-error-rate", "Error rate threshold for mismatches.", ARG::DOUBLE));
addOption(parser, ArgParseOption("ha", "htrim-adapter", "Trim only in case of adapter removal on same side."));
addSection(parser, "Output selection");
addOption(parser, ArgParseOption("f", "fasta-output", "Prefer non-quality format fasta for output."));
addOption(parser, ArgParseOption("z", "zip-output", "Direct compression of output files.", ARG::STRING));
addOption(parser, ArgParseOption("1", "stdout-reads", "Write reads to stdout, tagged and interleaved if needed."));
addOption(parser, ArgParseOption("R", "output-reads", "Output file for reads instead of target prefix usage.", ARG::OUTPUT_FILE));
addOption(parser, ArgParseOption("P", "output-reads2", "Output file for reads2 instead of target prefix usage.", ARG::OUTPUT_FILE));
addOption(parser, ArgParseOption("j", "length-dist", "Generate length distribution for read output files."));
addOption(parser, ArgParseOption("s", "single-reads", "Write single reads for too short counterparts in pairs."));
addOption(parser, ArgParseOption("S", "single-reads-paired", "Write paired single reads with N for short counterparts."));
addSection(parser, "Logging and tagging");
addOption(parser, ArgParseOption("l", "align-log", "Print chosen read alignments.", ARG::STRING));
addOption(parser, ArgParseOption("o", "stdout-log", "Write statistics to stdout instead of target log file."));
addOption(parser, ArgParseOption("O", "output-log", "Output file for logging instead of target prefix usage.", ARG::OUTPUT_FILE));
addOption(parser, ArgParseOption("g", "removal-tags", "Tag reads that are subject to adapter or barcode removal."));
addOption(parser, ArgParseOption("e", "number-tags", "Replace read tags by ascending number to save space."));
addOption(parser, ArgParseOption("d", "umi-tags", "Capture UMIs in reads at barcode or adapter N positions."));
hideOption(parser, "version");
setAdvanced(parser, "barcodes2");
setAdvanced(parser, "barcode-tail-length");
setAdvanced(parser, "barcode-keep");
setAdvanced(parser, "barcode-unassigned");
setAdvanced(parser, "barcode-match");
setAdvanced(parser, "barcode-mismatch");
setAdvanced(parser, "barcode-gap");
setAdvanced(parser, "adapter-seq");
setAdvanced(parser, "adapter-tail-length");
setAdvanced(parser, "adapter-relaxed");
setAdvanced(parser, "adapter-min-poverlap");
setAdvanced(parser, "adapter-revcomp");
setAdvanced(parser, "adapter-revcomp-end");
setAdvanced(parser, "adapter-add-barcode");
setAdvanced(parser, "adapter-trimmed-out");
setAdvanced(parser, "adapter-read-set");
setAdvanced(parser, "adapter-cycles");
setAdvanced(parser, "adapter-match");
setAdvanced(parser, "adapter-mismatch");
setAdvanced(parser, "adapter-gap");
// setAdvanced(parser, "adapter-overhang");
setAdvanced(parser, "post-trim-length");
setAdvanced(parser, "qtrim-win-size");
setAdvanced(parser, "qtrim-post-removal");
setAdvanced(parser, "htrim-left");
setAdvanced(parser, "htrim-min-length2");
setAdvanced(parser, "htrim-max-length");
setAdvanced(parser, "htrim-max-first");
setAdvanced(parser, "htrim-adapter");
setAdvanced(parser, "version-check");
setAdvanced(parser, "man-help");
setAdvanced(parser, "bundle");
setAdvanced(parser, "bundles");
setAdvanced(parser, "interleaved");
setAdvanced(parser, "iupac");
setAdvanced(parser, "length-dist");
setAdvanced(parser, "single-reads");
setAdvanced(parser, "single-reads-paired");
setAdvanced(parser, "output-reads");
setAdvanced(parser, "output-reads2");
setAdvanced(parser, "output-log");
setAdvanced(parser, "number-tags");
setAdvanced(parser, "umi-tags");
setCategory(parser, "Barcode and adapter removal");
// setRequired(parser, "reads");
// setMinValue(parser, "threads", "1");
// setValidValues(parser, "target", "fasta fa fastq fq");
// setValidValues(parser, "reads", "fasta fa fastq fq");
// setValidValues(parser, "reads2", "fasta fa fastq fq");
// setValidValues(parser, "barcode-reads", "fasta fa fastq fq");
// setValidValues(parser, "barcodes", "fasta fa");
// setValidValues(parser, "barcodes2", "fasta fa");
// setValidValues(parser, "adapters", "fasta fa");
// setValidValues(parser, "adapters2", "fasta fa");
// setValidValues(parser, "adapter-trim-end", "ANY LEFT RIGHT LTAIL RTAIL");
// setMinValue(parser, "adapter-tail-length", "1");
// setMinValue(parser, "adapter-min-overlap", "1");
// setMinValue(parser, "adapter-error-rate", "0");
// setMaxValue(parser, "adapter-error-rate", "1");
// setValidValues(parser, "barcode-trim-end", "ANY LEFT RIGHT LTAIL RTAIL");
// setMinValue(parser, "barcode-tail-length", "1");
// setMinValue(parser, "barcode-min-overlap", "1");
// setMinValue(parser, "barcode-error-rate", "0");
// setMaxValue(parser, "barcode-error-rate", "1");
// setMinValue(parser, "max-uncalled", "0");
// setMinValue(parser, "pre-trim-left", "1");
// setMinValue(parser, "pre-trim-right", "1");
// setMinValue(parser, "post-trim-length", "1");
// setMinValue(parser, "min-read-length", "1");
// setMinValue(parser, "qtrim-threshold", "0");
setValidValues(parser, "qtrim", "TAIL WIN BWA");
setValidValues(parser, "qtrim-format", "sanger solexa i1.3 i1.5 i1.8");
setValidValues(parser, "align-log", "ALL MOD TAB");
setValidValues(parser, "zip-output", "GZ BZ2");
setValidValues(parser, "adapter-read-set", "1 2");
setValidValues(parser, "adapter-revcomp", "ON ONLY");
setValidValues(parser, "adapter-trimmed-out", "OFF ONLY");
setValidValues(parser, "adapter-pair-overlap", "ON SHORT ONLY");
setValidValues(parser, "adapter-preset", "TruSeq SmallRNA Methyl Ribo Nextera NexteraMP");
// setDefaultValue(parser, "version-check", "OFF");
setDefaultValue(parser, "target", "flexbarOut");
setDefaultValue(parser, "threads", "1");
setDefaultValue(parser, "bundle", "256");
setDefaultValue(parser, "max-uncalled", "0");
setDefaultValue(parser, "min-read-length", "18");
setDefaultValue(parser, "barcode-trim-end", "LTAIL");
setDefaultValue(parser, "barcode-error-rate", "0.0");
setDefaultValue(parser, "barcode-match", "1");
setDefaultValue(parser, "barcode-mismatch", "-1");
setDefaultValue(parser, "barcode-gap", "-9");
setDefaultValue(parser, "adapter-trim-end", "RIGHT");
setDefaultValue(parser, "adapter-min-overlap", "3");
setDefaultValue(parser, "adapter-error-rate", "0.1");
setDefaultValue(parser, "adapter-min-poverlap", "40");
setDefaultValue(parser, "adapter-cycles", "1");
setDefaultValue(parser, "adapter-match", "1");
setDefaultValue(parser, "adapter-mismatch", "-1");
setDefaultValue(parser, "adapter-gap", "-6");
// setDefaultValue(parser, "adapter-overhang", "0");
setDefaultValue(parser, "qtrim-threshold", "20");
setDefaultValue(parser, "qtrim-win-size", "5");
setDefaultValue(parser, "htrim-min-length", "3");
setDefaultValue(parser, "htrim-error-rate", "0.1");
addTextSection(parser, "TRIM-END MODES");
addText(parser._toolDoc, "\\fBANY:\\fP longer side of read remains after removal of overlap", false);
addText(parser._toolDoc, "\\fBLEFT:\\fP right side remains after removal, align <= read end", false);
addText(parser._toolDoc, "\\fBRIGHT:\\fP left part remains after removal, align >= read start", false);
addText(parser._toolDoc, "\\fBLTAIL:\\fP consider first n bases of reads in alignment", false);
addText(parser._toolDoc, "\\fBRTAIL:\\fP use only last n bases, see tail-length options", false);
addTextSection(parser, "EXAMPLES");
addText(parser._toolDoc, "\\fBflexbar\\fP \\fB-r\\fP reads.fq \\fB-t\\fP target \\fB-q\\fP TAIL \\fB-qf\\fP i1.8", false);
addText(parser._toolDoc, "\\fBflexbar\\fP \\fB-r\\fP reads.fq \\fB-b\\fP barcodes.fa \\fB-bt\\fP LTAIL", false);
addText(parser._toolDoc, "\\fBflexbar\\fP \\fB-r\\fP reads.fq \\fB-a\\fP adapters.fa \\fB-ao\\fP 3 \\fB-ae\\fP 0.1", false);
addText(parser._toolDoc, "\\fBflexbar\\fP \\fB-r\\fP r1.fq \\fB-p\\fP r2.fq \\fB-a\\fP a1.fa \\fB-a2\\fP a2.fa \\fB-ap\\fP ON", false);
addText(parser._toolDoc, "\\fBflexbar\\fP \\fB-r\\fP r1.fq \\fB-p\\fP r2.fq \\fB-aa\\fP TruSeq \\fB-ap\\fP ON");
}
void printLocalTime(Options &o){
time_t t_current;
time(&t_current);
*o.out << "Local time: " << asctime(localtime(&t_current)) << "\n";
}
void parseCmdLine(seqan::ArgumentParser &parser, std::string version, int argc, char const ** argv){
using namespace std;
using seqan::ArgumentParser;
bool useLogFile = true;
for(int i = 0; i < argc; i++){
if(strncmp(argv[i], "-o", 2) == 0 || strncmp(argv[i], "--stdout-log", 12) == 0)
useLogFile = false;
}
for(int i = 0; i < argc; i++){
if(strncmp(argv[i], "-1", 2) == 0 || strncmp(argv[i], "--stdout-reads", 14) == 0)
useLogFile = true;
}
for(int i = 0; i < argc; i++){
if(strncmp(argv[i], "-h", 2) == 0 ||
strncmp(argv[i], "--help", 6) == 0 ||
strncmp(argv[i], "--full-help", 11) == 0 ||
strncmp(argv[i], "--version", 9) == 0 )
useLogFile = false;
}
if(! useLogFile) cout << endl;
ArgumentParser::ParseResult res = parse(parser, argc, argv);
if(res != ArgumentParser::PARSE_OK){
if(! isSet(parser, "version")){
cout << endl << getFlexbarURL() << endl;
if(isSet(parser, "help")){
cout << "Show advanced options: flexbar -hh\n" << endl;
}
}
else cout << endl;
exit(res == ArgumentParser::PARSE_ERROR);
}
if(isSet(parser, "versions")){
cout << endl;
printVersion(parser, cout);
cout << endl;
exit(0);
}
if(isSet(parser, "cite")){
cout << endl;
cout << getFlexbarBanner(version) << endl;
cout << getFlexbarCitation() << endl;
cout << getFlexbarURL() << endl;
exit(0);
}
if(isSet(parser, "man-help")){
printHelp(parser, cout, "man", true);
cout << endl;
exit(0);
}
if(! isSet(parser, "reads")){
cout << endl;
printShortHelp(parser);
cout << endl << getFlexbarURL();
cerr << "\nPlease specify reads input file.\n" << endl;
exit(1);
}
}
void initOptions(Options &o, seqan::ArgumentParser &parser){
using namespace std;
using namespace flexbar;
bool stdOutReads = isSet(parser, "stdout-reads");
bool stdOutLog = isSet(parser, "stdout-log");
if(stdOutReads) o.useStdout = true;
if(stdOutLog && ! stdOutReads){
o.logStdout = true;
o.out = &cout;
}
else{
string s;
getOptionValue(s, parser, "target");
s = s + ".log";
if(isSet(parser, "output-log") && ! o.logStdout){
getOptionValue(o.outLogFile, parser, "output-log");
s = o.outLogFile;
}
openOutputFile(o.fstrmOut, s);
o.out = &o.fstrmOut;
*o.out << endl;
}
getOptionValue(o.readsFile, parser, "reads");
checkInputType(o.readsFile, o.format, true);
}
void loadOptions(Options &o, seqan::ArgumentParser &parser){
using namespace std;
using namespace flexbar;
ostream *out = o.out;
*out << getFlexbarBanner(getVersion(parser)) << endl;
*out << getFlexbarURL() << endl << endl;
printLocalTime(o);
// basic options
getOptionValue(o.nThreads, parser, "threads");
*out << "Number of threads: " << o.nThreads << endl;
if(o.nThreads < 1){
cerr << "\n" << "Number of threads should be 1 at least.\n" << endl;
exit(1);
}
getOptionValue(o.bundleSize, parser, "bundle");
*out << "Bundled fragments: " << o.bundleSize << endl;
if(o.bundleSize < 1){
cerr << "\n" << "Bundle size should be 1 at least.\n" << endl;
exit(1);
}
if(isSet(parser, "bundles")){
getOptionValue(o.nBundles, parser, "bundles");
*out << "Number of bundles: " << o.nBundles << endl << endl;
if(o.nBundles < 1){
cerr << "\n" << "Number of bundles should be 1 at least.\n" << endl;
exit(1);
}
}
else *out << endl;
getOptionValue(o.targetName, parser, "target");
*out << "Target name: " << o.targetName << endl;
*out << "File type: ";
if(o.format == FASTA) *out << "fasta";
else if(o.format == FASTQ) *out << "fastq";
*out << endl;
getOptionValue(o.readsFile, parser, "reads");
*out << "Reads file: ";
if(o.readsFile == "-"){
*out << "stdin" << endl;
o.useStdin = true;
}
else *out << o.readsFile << endl;
o.runType = SINGLE;
if(isSet(parser, "interleaved")){
*out << "Interleaved reads: on (paired run)" << endl;
o.runType = PAIRED;
o.isPaired = true;
o.interleavedInput = true;
}
if(isSet(parser, "reads2")){
if(o.interleavedInput){
cerr << "\n" << "Please specify either interleaved reads or second reads file.\n" << endl;
exit(1);
}
getOptionValue(o.readsFile2, parser, "reads2");
*out << "Reads file 2: " << o.readsFile2 << " (paired run)" << endl;
o.runType = PAIRED;
o.isPaired = true;
FileFormat fformat;
checkInputType(o.readsFile2, fformat, false);
if(o.format != fformat){
cerr << "\n" << "First and second reads file do not have same format.\n" << endl;
exit(1);
}
}
if(isSet(parser, "iupac")){
*out << "Iupac reads: on" << endl;
o.iupacInput = true;
}
// barcode and adapter file options
if(isSet(parser, "barcodes")){
if(isSet(parser, "barcode-reads")){
getOptionValue(o.barReadsFile, parser, "barcode-reads");
*out << "Barcode reads file: " << o.barReadsFile << endl;
FileFormat fformat;
checkInputType(o.barReadsFile, fformat, false);
if(o.format != fformat){
cerr << "\n" << "Barcode reads file does not have same format as reads.\n" << endl;
exit(1);
}
o.barDetect = BARCODE_READ;
}
else o.barDetect = WITHIN_READ_REMOVAL;
getOptionValue(o.barcodeFile, parser, "barcodes");
*out << "Barcode file: " << o.barcodeFile << endl;
if(o.runType == SINGLE) o.runType = SINGLE_BARCODED;
else if(o.runType == PAIRED) o.runType = PAIRED_BARCODED;
if(o.barDetect == WITHIN_READ_REMOVAL && isSet(parser, "barcode-keep")){
o.barDetect = WITHIN_READ;
}
if(isSet(parser, "barcodes2") && o.barDetect != BARCODE_READ && o.isPaired){
getOptionValue(o.barcode2File, parser, "barcodes2");
*out << "Barcode file 2: " << o.barcode2File << endl;
if(o.barDetect == WITHIN_READ_REMOVAL) o.barDetect = WITHIN_READ_REMOVAL2;
else if(o.barDetect == WITHIN_READ) o.barDetect = WITHIN_READ2;
}
}
if(isSet(parser, "adapters")){
getOptionValue(o.adapterFile, parser, "adapters");
*out << "Adapter file: " << o.adapterFile << endl;
o.adapRm = NORMAL;
o.useAdapterFile = true;
}
else if(isSet(parser, "adapter-seq")){
getOptionValue(o.adapterSeq, parser, "adapter-seq");
o.adapRm = NORMAL;
}
if(isSet(parser, "adapters2") && o.adapRm == NORMAL && o.isPaired){
getOptionValue(o.adapter2File, parser, "adapters2");
*out << "Adapter file 2: " << o.adapter2File << endl;
o.adapRm = NORMAL2;
}
if(isSet(parser, "adapter-preset")){
if(o.adapRm == NORMAL || o.adapRm == NORMAL2){
cerr << "\n" << "Please specify either adapter preset or custom adapters.\n" << endl;
exit(1);
}
string aa;
getOptionValue(aa, parser, "adapter-preset");
if(aa == "TruSeq") o.aPreset = TRUSEQ;
else if(aa == "SmallRNA") o.aPreset = SMALLRNA;
else if(aa == "Methyl") o.aPreset = METHYL;
else if(aa == "Ribo") o.aPreset = RIBO;
else if(aa == "Nextera") o.aPreset = NEXTERA;
else if(aa == "NexteraMP") o.aPreset = NEXTERAMP;
*out << "Adapter preset: " << aa << endl;
o.adapRm = NORMAL;
if(o.isPaired && (o.aPreset == TRUSEQ || o.aPreset == METHYL || o.aPreset == NEXTERAMP)) o.adapRm = NORMAL2;
if(! o.isPaired && o.aPreset == NEXTERAMP){
cerr << "\n" << "Please provide paired reads for preset NexteraMP.\n" << endl;
exit(1);
}
}
*out << endl;
// filtering and trimming options
getOptionValue(o.maxUncalled, parser, "max-uncalled");
*out << "max-uncalled: " << o.maxUncalled << endl;
if(isSet(parser, "pre-trim-left")){
getOptionValue(o.cutLen_begin, parser, "pre-trim-left");
*out << "pre-trim-left: " << o.cutLen_begin << endl;
}
if(isSet(parser, "pre-trim-right")){
getOptionValue(o.cutLen_end, parser, "pre-trim-right");
*out << "pre-trim-right: " << o.cutLen_end << endl;
}
if(isSet(parser, "post-trim-length")){
getOptionValue(o.cutLen_read, parser, "post-trim-length");
*out << "post-trim-length: " << o.cutLen_read << endl;
}
getOptionValue(o.min_readLen, parser, "min-read-length");
*out << "min-read-length: " << o.min_readLen << endl;
if(o.min_readLen < 1){
cerr << "\n" << "Minimum read length should be 1 or higher.\n" << endl;
exit(1);
}
if(o.cutLen_read != 0 && o.cutLen_read < o.min_readLen){
o.cutLen_read = 0;
cerr << "\nOption post-trim-length omitted, as it is shorter than min read length.\n" << endl;
}
// quality-based trimming
if(isSet(parser, "qtrim") && o.format == FASTQ){
string qt;
getOptionValue(qt, parser, "qtrim");
if(qt == "TAIL") o.qTrim = TAIL;
else if(qt == "WIN") o.qTrim = WIN;
else if(qt == "BWA") o.qTrim = BWA;
*out << "qtrim: " << qt << endl;
if(isSet(parser, "qtrim-format")){
string quality;
getOptionValue(quality, parser, "qtrim-format");
if(quality == "sanger") o.qual = SANGER;
else if(quality == "solexa") o.qual = SOLEXA;
else if(quality == "i1.3") o.qual = ILLUMINA;
else if(quality == "i1.5") o.qual = ILLUMINA;
else if(quality == "i1.8") o.qual = SANGER;
*out << "qtrim-format: " << quality << endl;
}
else{
cerr << "\n" << "Specify qtrim-format for quality-based trimming.\n" << endl;
exit(1);
}
getOptionValue(o.qtrimThresh, parser, "qtrim-threshold");
if(o.qtrimThresh > 0){
*out << "qtrim-threshold: " << o.qtrimThresh;
switch(o.qual){
case SANGER: o.qtrimThresh += 33;
break;
case SOLEXA: o.qtrimThresh += 59;
break;
case ILLUMINA: o.qtrimThresh += 64;
}
*out << " (" << o.qtrimThresh << ")" << endl;
}
if(o.qTrim == WIN || o.qTrim == WINTAIL){
// if(isSet(parser, "qtrim-win-mean")){
// getOptionValue(o.qtrimWinMean, parser, "qtrim-win-mean");
// *out << "qtrim-win-mean: " << o.qtrimWinMean << endl;
// }
getOptionValue(o.qtrimWinSize, parser, "qtrim-win-size");
*out << "qtrim-win-size: " << o.qtrimWinSize << endl;
}
if(isSet(parser, "qtrim-post-removal")) o.qtrimPostRm = true;
}
// trimming of homopolymers
if(isSet(parser, "htrim-left") || isSet(parser, "htrim-right")){
if(isSet(parser, "htrim-left")){
getOptionValue(o.htrimLeft, parser, "htrim-left");
*out << "htrim-left: " << o.htrimLeft << endl;
}
if(isSet(parser, "htrim-right")){
getOptionValue(o.htrimRight, parser, "htrim-right");
*out << "htrim-right: " << o.htrimRight << endl;
}
getOptionValue(o.htrimMinLength, parser, "htrim-min-length");
*out << "htrim-min-length: " << o.htrimMinLength << endl;
if(isSet(parser, "htrim-min-length2")){
getOptionValue(o.htrimMinLength2, parser, "htrim-min-length2");
*out << "htrim-min-length2: " << o.htrimMinLength2 << endl;
}
if(isSet(parser, "htrim-max-length")){
getOptionValue(o.htrimMaxLength, parser, "htrim-max-length");
*out << "htrim-max-length: " << o.htrimMaxLength << endl;
if(isSet(parser, "htrim-max-first")){
*out << "htrim-max-first: on" << endl;
o.htrimMaxFirstOnly = true;
}
}
getOptionValue(o.h_errorRate, parser, "htrim-error-rate");
*out << "htrim-error-rate: " << o.h_errorRate << endl;
if(isSet(parser, "htrim-adapter")){
*out << "htrim-adapter: on" << endl;
o.htrimAdapterRm = true;
}
}
// output, logging and tagging options
if(isSet(parser, "align-log")){
getOptionValue(o.logAlignStr, parser, "align-log");
if(o.logAlignStr == "ALL") o.logAlign = ALL;
else if(o.logAlignStr == "TAB") o.logAlign = TAB;
else if(o.logAlignStr == "MOD") o.logAlign = MOD;
}
if(isSet(parser, "zip-output")){
getOptionValue(o.outCompression, parser, "zip-output");
if(o.outCompression == "GZ"){
o.cmprsType = GZ;
o.outCompression = ".gz";
}
else if(o.outCompression == "BZ2"){
o.cmprsType = BZ2;
o.outCompression = ".bz2";
}
}
if(isSet(parser, "single-reads")) o.writeSingleReads = true;
if(isSet(parser, "single-reads-paired")){
o.writeSingleReadsP = true;
o.writeSingleReads = false;
}
if(! o.useStdout && ! o.writeSingleReads && (o.runType == SINGLE || o.runType == PAIRED)){
if(isSet(parser, "output-reads") && (isSet(parser, "output-reads2") || o.runType == SINGLE)){
getOptionValue(o.outReadsFile, parser, "output-reads");
}
if(isSet(parser, "output-reads2") && isSet(parser, "output-reads") && o.runType == PAIRED){
getOptionValue(o.outReadsFile2, parser, "output-reads2");
if(o.outReadsFile == o.outReadsFile2){
cerr << "\n" << "Output reads and reads2 file should not be the same.\n" << endl;
exit(1);
}
}
if(o.outLogFile != "" && (o.outLogFile == o.outReadsFile || o.outLogFile == o.outReadsFile2)){
cerr << "\n" << "Output log file should not be the same as output reads or reads2 file.\n" << endl;
exit(1);
}
}
if(isSet(parser, "fasta-output")) o.switch2Fasta = true;
if(isSet(parser, "length-dist")) o.writeLengthDist = true;
if(isSet(parser, "number-tags")) o.useNumberTag = true;
if(isSet(parser, "removal-tags")) o.useRemovalTag = true;
if(isSet(parser, "umi-tags")) o.umiTags = true;
*out << endl;
// barcode options
if(o.barDetect != BOFF){
string b_trim_end;
getOptionValue(b_trim_end, parser, "barcode-trim-end");
if(b_trim_end == "LEFT") o.b_end = LEFT;
else if(b_trim_end == "RIGHT") o.b_end = RIGHT;
else if(b_trim_end == "ANY") o.b_end = ANY;
else if(b_trim_end == "LTAIL") o.b_end = LTAIL;
else if(b_trim_end == "RTAIL") o.b_end = RTAIL;
else{
cerr << "\nSpecified barcode trim-end is unknown.\n" << endl;
exit(1);
}
*out << "barcode-trim-end: " << b_trim_end << endl;
if(isSet(parser, "barcode-tail-length")){
getOptionValue(o.b_tail_len, parser, "barcode-tail-length");
*out << "barcode-tail-length: " << o.b_tail_len << endl;
}
if(isSet(parser, "barcode-min-overlap")){
getOptionValue(o.b_min_overlap, parser, "barcode-min-overlap");
*out << "barcode-min-overlap: " << o.b_min_overlap << endl;
if(o.b_min_overlap < 1){
cerr << "\nBarcode min-overlap should be 1 at least.\n" << endl;
exit(1);
}
}
getOptionValue(o.b_errorRate, parser, "barcode-error-rate");
*out << "barcode-error-rate: " << o.b_errorRate << endl;
if(o.b_errorRate < 0 || o.b_errorRate >= 1){
cerr << "\nBarcode error rate should be between 0 and 1.\n" << endl;
exit(1);
}
if(isSet(parser, "barcode-unassigned")) o.writeUnassigned = true;
getOptionValue(o.b_match, parser, "barcode-match");
getOptionValue(o.b_mismatch, parser, "barcode-mismatch");
getOptionValue(o.b_gapCost, parser, "barcode-gap");
*out << "barcode-match: ";
if(o.b_match >= 0) *out << " ";
*out << o.b_match << endl;
*out << "barcode-mismatch: ";
if(o.b_mismatch >= 0) *out << " ";
*out << o.b_mismatch << endl;
*out << "barcode-gap: ";
if(o.b_gapCost >= 0) *out << " ";
*out << o.b_gapCost << "\n" << endl;
}
// adapter options
if(o.isPaired && isSet(parser, "adapter-pair-overlap")){
string pOverlap;
getOptionValue(pOverlap, parser, "adapter-pair-overlap");
if (pOverlap == "ON") o.poMode = PON;
else if(pOverlap == "SHORT") o.poMode = PSHORT;
else if(pOverlap == "ONLY") o.poMode = PONLY;
else {
cerr << "\nSpecified pair overlap mode is unknown.\n" << endl;
exit(1);
}
if(o.adapRm == AOFF && (o.poMode == PON || o.poMode == PSHORT)) o.poMode = POFF;
else *out << "adapter-pair-overlap: " << pOverlap << endl;
}
if(o.adapRm != AOFF || o.poMode == PONLY){
if(o.adapRm != AOFF){
string a_trim_end;
getOptionValue(a_trim_end, parser, "adapter-trim-end");
if (a_trim_end == "LEFT") o.a_end = LEFT;
else if(a_trim_end == "RIGHT") o.a_end = RIGHT;
else if(a_trim_end == "ANY") o.a_end = ANY;
else if(a_trim_end == "LTAIL") o.a_end = LTAIL;
else if(a_trim_end == "RTAIL") o.a_end = RTAIL;
else {
cerr << "\nSpecified adapter trim-end is unknown.\n" << endl;
exit(1);
}
*out << "adapter-trim-end: " << a_trim_end << endl;
if(o.aPreset != APOFF && o.a_end != RIGHT){
cerr << "\nAdapter trim-end should be RIGHT for adapter presets.\n" << endl;
exit(1);
}
if(isSet(parser, "adapter-tail-length")){
getOptionValue(o.a_tail_len, parser, "adapter-tail-length");
*out << "adapter-tail-length: " << o.a_tail_len << endl;
}
if(isSet(parser, "adapter-revcomp")){
string rcModeStr;
getOptionValue(rcModeStr, parser, "adapter-revcomp");
*out << "adapter-revcomp: " << rcModeStr << endl;
if (rcModeStr == "ON") o.rcMode = RCON;
else if(rcModeStr == "ONLY") o.rcMode = RCONLY;
if(isSet(parser, "adapter-revcomp-end") && o.rcMode == RCON){
string arc_trim_end;
getOptionValue(arc_trim_end, parser, "adapter-revcomp-end");
if (arc_trim_end == "LEFT") o.arc_end = LEFT;
else if(arc_trim_end == "RIGHT") o.arc_end = RIGHT;
else if(arc_trim_end == "ANY") o.arc_end = ANY;
else if(arc_trim_end == "LTAIL") o.arc_end = LTAIL;
else if(arc_trim_end == "RTAIL") o.arc_end = RTAIL;
else {
cerr << "\nSpecified reverse complement adapter trim-end is unknown.\n" << endl;
exit(1);
}
if(o.arc_end != o.a_end){
*out << "adapter-revcomp-end: " << arc_trim_end << endl;
o.useRcTrimEnd = true;
}
}
}
if(isSet(parser, "adapter-relaxed")){
*out << "adapter-relaxed: on" << endl;
o.relaxRegion = true;
}
if(isSet(parser, "adapter-add-barcode") && o.isPaired && o.a_end == RIGHT && o.rcMode != RCON &&
o.barDetect != BARCODE_READ && o.barDetect != BOFF && o.b_end == LTAIL){
*out << "adapter-add-barcode: on" << endl;
o.addBarcodeAdapter = true;
}
if(isSet(parser, "adapter-trimmed-out")){
string a_trimmed_out;
getOptionValue(a_trimmed_out, parser, "adapter-trimmed-out");
*out << "adapter-trimmed-out: " << a_trimmed_out << endl;
if(a_trimmed_out == "OFF") o.aTrimmed = ATOFF;
else if(a_trimmed_out == "ONLY") o.aTrimmed = ATONLY;
}
if(isSet(parser, "adapter-read-set") && o.isPaired && o.adapRm != NORMAL2){
string a_read_set;
getOptionValue(a_read_set, parser, "adapter-read-set");
*out << "adapter-read-set: " << a_read_set << endl;
if(a_read_set == "1") o.adapRm = AONE;
else if(a_read_set == "2") o.adapRm = ATWO;
}
getOptionValue(o.a_cycles, parser, "adapter-cycles");
if(o.a_cycles < 1){
cerr << "\nNumber of adapter removal cycles should be 1 at least.\n" << endl;
exit(1);
}
if(o.aPreset == NEXTERAMP && o.a_cycles < 3) o.a_cycles = 3;
if(o.a_cycles > 1) *out << "adapter-cycles: " << o.a_cycles << endl;
getOptionValue(o.a_min_overlap, parser, "adapter-min-overlap");
*out << "adapter-min-overlap: " << o.a_min_overlap << endl;
if(o.a_min_overlap < 1){
cerr << "\nAdapter min-overlap should be 1 at least.\n" << endl;
exit(1);
}
if((o.poMode == PON || o.poMode == PSHORT) && o.a_end != RIGHT && o.a_end != RTAIL &&
(! o.useRcTrimEnd || (o.arc_end != RIGHT && o.arc_end != RTAIL))){
cerr << "\nOne adapter trim-end should be RIGHT or RTAIL if pair overlap is ON or SHORT.\n" << endl;
exit(1);
}
// getOptionValue(o.a_overhang, parser, "adapter-overhang");
// *out << "adapter-overhang: " << o.a_overhang << endl;
}
if(o.poMode != POFF){
getOptionValue(o.p_min_overlap, parser, "adapter-min-poverlap");
*out << "adapter-min-poverlap: " << o.p_min_overlap << endl;
if(o.p_min_overlap < 20){
cerr << "\nMinimum overlap of paired reads should be 20 at least.\n" << endl;
exit(1);
}
}
getOptionValue(o.a_errorRate, parser, "adapter-error-rate");
*out << "adapter-error-rate: " << o.a_errorRate << endl;
if(o.a_errorRate < 0 || o.a_errorRate >= 1){
cerr << "\nAdapter error rate should be between 0 and 1.\n" << endl;
exit(1);
}
getOptionValue(o.a_match, parser, "adapter-match");
getOptionValue(o.a_mismatch, parser, "adapter-mismatch");
getOptionValue(o.a_gapCost, parser, "adapter-gap");
*out << "adapter-match: ";
if(o.a_match >= 0) *out << " ";
*out << o.a_match << endl;
*out << "adapter-mismatch: ";
if(o.a_mismatch >= 0) *out << " ";
*out << o.a_mismatch << endl;
*out << "adapter-gap: ";
if(o.a_gapCost >= 0) *out << " ";
*out << o.a_gapCost << "\n" << endl;
}
}
#endif
flexbar-3.5.0/src/PairedAlign.h 0000664 0000000 0000000 00000033073 13464300463 0016272 0 ustar 00root root 0000000 0000000 // PairedAlign.h
#ifndef FLEXBAR_PAIREDALIGN_H
#define FLEXBAR_PAIREDALIGN_H
#include "SeqAlign.h"
#include "SeqAlignPair.h"
#include "SeqAlignAlgo.h"
template
class PairedAlign : public tbb::filter {
private:
const bool m_writeUnassigned, m_twoBarcodes, m_umiTags, m_useRcTrimEnd;
const bool m_htrim, m_htrimAdapterRm, m_htrimMaxFirstOnly, m_addBarcodeAdapter;
const std::string m_htrimLeft, m_htrimRight;
const unsigned int m_htrimMinLength, m_htrimMinLength2, m_htrimMaxLength;
const unsigned int m_arTimes;
const float m_htrimErrorRate;
const flexbar::FileFormat m_format;
const flexbar::LogAlign m_log;
const flexbar::RunType m_runType;
const flexbar::BarcodeDetect m_barType;
const flexbar::AdapterRemoval m_adapRem;
const flexbar::TrimEnd m_aTrimEnd, m_arcTrimEnd, m_bTrimEnd;
const flexbar::PairOverlap m_poMode;
tbb::atomic m_unassigned;
tbb::concurrent_vector *m_adapters, *m_adapters2;
tbb::concurrent_vector *m_barcodes, *m_barcodes2;
typedef SeqAlign > TSeqAlign;
TSeqAlign *m_a1, *m_b1, *m_a2, *m_b2;
typedef SeqAlignPair > TSeqAlignPair;
TSeqAlignPair *m_p;
std::ostream *out;
public:
PairedAlign(Options &o) :
filter(parallel),
m_format(o.format),
m_log(o.logAlign),
m_runType(o.runType),
m_barType(o.barDetect),
m_adapRem(o.adapRm),
m_poMode(o.poMode),
m_aTrimEnd(o.a_end),
m_arcTrimEnd(o.arc_end),
m_bTrimEnd(o.b_end),
m_arTimes(o.a_cycles),
m_umiTags(o.umiTags),
m_useRcTrimEnd(o.useRcTrimEnd),
m_writeUnassigned(o.writeUnassigned),
m_addBarcodeAdapter(o.addBarcodeAdapter),
m_htrimLeft(o.htrimLeft),
m_htrimRight(o.htrimRight),
m_htrimMinLength(o.htrimMinLength),
m_htrimMinLength2(o.htrimMinLength2),
m_htrimMaxLength(o.htrimMaxLength),
m_htrimMaxFirstOnly(o.htrimMaxFirstOnly),
m_htrimErrorRate(o.h_errorRate),
m_htrimAdapterRm(o.htrimAdapterRm),
m_htrim(o.htrimLeft != "" || o.htrimRight != ""),
m_twoBarcodes(o.barDetect == flexbar::WITHIN_READ_REMOVAL2 || o.barDetect == flexbar::WITHIN_READ2),
out(o.out),
m_unassigned(0){
m_barcodes = &o.barcodes;
m_adapters = &o.adapters;
m_barcodes2 = &o.barcodes2;
m_adapters2 = &o.adapters2;
m_b1 = new TSeqAlign(m_barcodes, o, o.b_min_overlap, o.b_errorRate, o.b_tail_len, o.b_match, o.b_mismatch, o.b_gapCost, true);
m_b2 = new TSeqAlign(m_barcodes2, o, o.b_min_overlap, o.b_errorRate, o.b_tail_len, o.b_match, o.b_mismatch, o.b_gapCost, true);
m_a1 = new TSeqAlign(m_adapters, o, o.a_min_overlap, o.a_errorRate, o.a_tail_len, o.a_match, o.a_mismatch, o.a_gapCost, false);
m_a2 = new TSeqAlign(m_adapters2, o, o.a_min_overlap, o.a_errorRate, o.a_tail_len, o.a_match, o.a_mismatch, o.a_gapCost, false);
m_p = new TSeqAlignPair(o, o.p_min_overlap, o.a_errorRate, o.a_match, o.a_mismatch, o.a_gapCost);
if(m_log == flexbar::TAB)
*out << "ReadTag\tQueryTag\tQueryStart\tQueryEnd\tOverlapLength\tMismatches\tIndels\tAllowedErrors" << std::endl;
}
virtual ~PairedAlign(){
delete m_b1;
delete m_b2;
delete m_a1;
delete m_a2;
delete m_p;
};
void alignPairedReadToBarcodes(flexbar::TPairedRead* pRead, flexbar::TAlignBundle &alBundle, std::vector &cycle, std::vector &idxAl, const flexbar::AlignmentMode &alMode){
using namespace flexbar;
switch(m_barType){
case BARCODE_READ: pRead->barID = m_b1->alignSeqRead(pRead->b, false, alBundle[0], cycle[0], idxAl[0], alMode, m_bTrimEnd, ""); break;
case WITHIN_READ_REMOVAL2: pRead->barID2 = m_b2->alignSeqRead(pRead->r2, true, alBundle[2], cycle[2], idxAl[2], alMode, m_bTrimEnd, "");
case WITHIN_READ_REMOVAL: pRead->barID = m_b1->alignSeqRead(pRead->r1, true, alBundle[1], cycle[1], idxAl[1], alMode, m_bTrimEnd, ""); break;
case WITHIN_READ2: pRead->barID2 = m_b2->alignSeqRead(pRead->r2, false, alBundle[2], cycle[2], idxAl[2], alMode, m_bTrimEnd, "");
case WITHIN_READ: pRead->barID = m_b1->alignSeqRead(pRead->r1, false, alBundle[1], cycle[1], idxAl[1], alMode, m_bTrimEnd, ""); break;
case BOFF: break;
}
if(pRead->barID == 0 || (m_twoBarcodes && pRead->barID2 == 0)){
if(cycle[0] != PRELOAD) m_unassigned++;
}
}
void alignPairedReadToAdapters(flexbar::TPairedRead* pRead, flexbar::TAlignBundle &alBundle, std::vector &cycle, std::vector &idxAl, const flexbar::AlignmentMode &alMode, const flexbar::TrimEnd trimEnd){
using namespace flexbar;
if(m_adapRem != ATWO){
TSeqStr addBarcode = "";
if(m_addBarcodeAdapter && pRead->r2 != NULL && pRead->barID2 > 0){
addBarcode = m_barcodes2->at(pRead->barID2 - 1).seq;
if(m_umiTags && pRead->r2->umi != ""){
unsigned int umiPos = 1;
for(unsigned int i = 0; i < length(addBarcode); ++i){
if(addBarcode[i] == 'N' && length(pRead->r2->umi) > umiPos){
addBarcode[i] = pRead->r2->umi[umiPos++];
}
}
}
seqan::reverseComplement(addBarcode);
}
m_a1->alignSeqRead(pRead->r1, true, alBundle[0], cycle[0], idxAl[0], alMode, trimEnd, addBarcode);
}
if(pRead->r2 != NULL && m_adapRem != AONE){
TSeqStr addBarcode = "";
if(m_addBarcodeAdapter && pRead->barID > 0){
addBarcode = m_barcodes->at(pRead->barID - 1).seq;
if(m_umiTags && pRead->r1->umi != ""){
unsigned int umiPos = 1;
for(unsigned int i = 0; i < length(addBarcode); ++i){
if(addBarcode[i] == 'N' && length(pRead->r1->umi) > umiPos){
addBarcode[i] = pRead->r1->umi[umiPos++];
}
}
}
seqan::reverseComplement(addBarcode);
}
if(m_adapRem != NORMAL2) m_a1->alignSeqRead(pRead->r2, true, alBundle[1], cycle[1], idxAl[1], alMode, trimEnd, addBarcode);
else m_a2->alignSeqRead(pRead->r2, true, alBundle[1], cycle[1], idxAl[1], alMode, trimEnd, addBarcode);
}
}
void trimLeftHPS(flexbar::TSeqRead* seqRead){
using namespace std;
using namespace flexbar;
if(m_htrimAdapterRm && m_useRcTrimEnd){
if (seqRead->rmAdapter && (m_aTrimEnd == RIGHT || m_aTrimEnd == RTAIL)) return;
else if(seqRead->rmAdapterRC && (m_arcTrimEnd == RIGHT || m_arcTrimEnd == RTAIL)) return;
}
else if(m_htrimAdapterRm && ! m_useRcTrimEnd){
if(m_aTrimEnd == RIGHT || m_aTrimEnd == RTAIL) return;
}
if(! m_htrimAdapterRm || seqRead->rmAdapter || seqRead->rmAdapterRC){
for(unsigned int s = 0; s < m_htrimLeft.length(); ++s){
char nuc = m_htrimLeft[s];
unsigned int cutPos = 0;
unsigned int notNuc = 0;
for(unsigned int i = 0; i < length(seqRead->seq); ++i){
if(seqRead->seq[i] != nuc){
notNuc++;
}
else if(notNuc <= m_htrimErrorRate * (i+1)){
if(m_htrimMaxLength != 0 && i+1 > m_htrimMaxLength && (! m_htrimMaxFirstOnly || s == 0)) break;
cutPos = i+1;
}
}
unsigned int htrimMinLength = m_htrimMinLength;
if(m_htrimMinLength2 > 0 && s > 0) htrimMinLength = m_htrimMinLength2;
if(cutPos > 0 && cutPos >= htrimMinLength){
erase(seqRead->seq, 0, cutPos);
if(m_format == FASTQ){
erase(seqRead->qual, 0, cutPos);
}
}
}
}
}
void trimRightHPS(flexbar::TSeqRead* seqRead){
using namespace std;
using namespace flexbar;
if(m_htrimAdapterRm && m_useRcTrimEnd){
if (seqRead->rmAdapter && (m_aTrimEnd == LEFT || m_aTrimEnd == LTAIL)) return;
else if(seqRead->rmAdapterRC && (m_arcTrimEnd == LEFT || m_arcTrimEnd == LTAIL)) return;
}
else if(m_htrimAdapterRm && ! m_useRcTrimEnd){
if(m_aTrimEnd == LEFT || m_aTrimEnd == LTAIL) return;
}
if(! m_htrimAdapterRm || seqRead->rmAdapter || seqRead->rmAdapterRC){
for(unsigned int s = 0; s < m_htrimRight.length(); ++s){
char nuc = m_htrimRight[s];
unsigned int seqLen = length(seqRead->seq);
unsigned int cutPos = seqLen;
unsigned int notNuc = 0;
for(int i = seqLen - 1; i >= 0; --i){
if(seqRead->seq[i] != nuc){
notNuc++;
}
else if(notNuc <= m_htrimErrorRate * (seqLen - i)){
if(m_htrimMaxLength != 0 && i < seqLen - m_htrimMaxLength && (! m_htrimMaxFirstOnly || s == 0)) break;
cutPos = i;
}
}
unsigned int htrimMinLength = m_htrimMinLength;
if(m_htrimMinLength2 > 0 && s > 0) htrimMinLength = m_htrimMinLength2;
if(cutPos < seqLen && cutPos <= seqLen - htrimMinLength){
erase(seqRead->seq, cutPos, length(seqRead->seq));
if(m_format == FASTQ){
erase(seqRead->qual, cutPos, length(seqRead->qual));
}
}
}
}
}
// tbb filter operator
void* operator()(void* item){
using namespace flexbar;
if(item != NULL){
TPairedReadBundle *prBundle = static_cast(item);
if(m_umiTags){
for(unsigned int i = 0; i < prBundle->size(); ++i){
prBundle->at(i)->r1->umi = "";
if(prBundle->at(i)->r2 != NULL)
prBundle->at(i)->r2->umi = "";
}
}
AlignmentMode alMode = ALIGNALL;
// barcode detection
if(m_barType != BOFF){
TAlignBundle alBundle;
Alignments r1AlignmentsB, r2AlignmentsB, bAlignmentsB;
alBundle.push_back(bAlignmentsB);
alBundle.push_back(r1AlignmentsB);
alBundle.push_back(r2AlignmentsB);
std::vector idxAl;
std::vector cycle;
for(unsigned int i = 0; i < 3; ++i){
idxAl.push_back(0);
cycle.push_back(PRELOAD);
}
for(unsigned int i = 0; i < prBundle->size(); ++i){
alignPairedReadToBarcodes(prBundle->at(i), alBundle, cycle, idxAl, alMode);
}
for(unsigned int i = 0; i < 3; ++i){
idxAl[i] = 0;
cycle[i] = COMPUTE;
}
for(unsigned int i = 0; i < prBundle->size(); ++i){
alignPairedReadToBarcodes(prBundle->at(i), alBundle, cycle, idxAl, alMode);
}
}
// adapter removal
if(m_poMode != POFF){
Alignments alignments;
unsigned int idxAl = 0;
ComputeCycle cycle = PRELOAD;
for(unsigned int i = 0; i < prBundle->size(); ++i){
m_p->alignSeqReadPair(prBundle->at(i)->r1, prBundle->at(i)->r2, alignments, cycle, idxAl);
}
idxAl = 0;
cycle = COMPUTE;
for(unsigned int i = 0; i < prBundle->size(); ++i){
m_p->alignSeqReadPair(prBundle->at(i)->r1, prBundle->at(i)->r2, alignments, cycle, idxAl);
}
}
if(m_adapRem != AOFF){
for(unsigned int c = 0; c < m_arTimes; ++c){
flexbar::TrimEnd trimEnd = m_aTrimEnd;
unsigned int rc = 1;
if(m_useRcTrimEnd){
alMode = ALIGNRCOFF;
rc = 2;
}
for(unsigned int r = 0; r < rc; ++r){
if(m_useRcTrimEnd && r == 1){
alMode = ALIGNRC;
trimEnd = m_arcTrimEnd;
}
TAlignBundle alBundle;
Alignments r1AlignmentsA, r2AlignmentsA;
alBundle.push_back(r1AlignmentsA);
alBundle.push_back(r2AlignmentsA);
std::vector idxAl;
std::vector cycle;
for(unsigned int i = 0; i < 2; ++i){
idxAl.push_back(0);
cycle.push_back(PRELOAD);
}
for(unsigned int i = 0; i < prBundle->size(); ++i){
alignPairedReadToAdapters(prBundle->at(i), alBundle, cycle, idxAl, alMode, trimEnd);
}
for(unsigned int i = 0; i < 2; ++i){
idxAl[i] = 0;
cycle[i] = COMPUTE;
}
for(unsigned int i = 0; i < prBundle->size(); ++i){
alignPairedReadToAdapters(prBundle->at(i), alBundle, cycle, idxAl, alMode, trimEnd);
}
}
}
}
if(m_umiTags){
for(unsigned int i = 0; i < prBundle->size(); ++i){
append(prBundle->at(i)->r1->id, prBundle->at(i)->r1->umi);
if(prBundle->at(i)->r2 != NULL){
append(prBundle->at(i)->r1->id, prBundle->at(i)->r2->umi);
append(prBundle->at(i)->r2->id, prBundle->at(i)->r1->umi);
append(prBundle->at(i)->r2->id, prBundle->at(i)->r2->umi);
}
}
}
if(m_htrim){
if(m_htrimLeft != ""){
for(unsigned int i = 0; i < prBundle->size(); ++i){
trimLeftHPS(prBundle->at(i)->r1);
if(prBundle->at(i)->r2 != NULL)
trimLeftHPS(prBundle->at(i)->r2);
}
}
if(m_htrimRight != ""){
for(unsigned int i = 0; i < prBundle->size(); ++i){
trimRightHPS(prBundle->at(i)->r1);
if(prBundle->at(i)->r2 != NULL)
trimRightHPS(prBundle->at(i)->r2);
}
}
}
return prBundle;
}
else return NULL;
}
unsigned long getNrUnassignedReads() const {
using namespace flexbar;
if(m_runType == PAIRED_BARCODED) return m_unassigned * 2;
else return m_unassigned;
}
unsigned long getNrPreShortReads() const {
using namespace flexbar;
if (m_poMode != POFF) return m_p->getNrPreShortReads();
else if(m_adapRem != NORMAL2) return m_a1->getNrPreShortReads();
else return m_a1->getNrPreShortReads() + m_a2->getNrPreShortReads();
}
void printPairOverlapStats(){
using namespace flexbar;
if(m_p->getNrOverlappingReads() > 0)
*out << m_p->getOverlapStatsString() << "\n\n";
if(m_adapRem == AOFF) *out << std::endl;
}
void printAdapterOverlapStats(){
using namespace flexbar;
if(m_a1->getNrModifiedReads() > 0)
*out << m_a1->getOverlapStatsString() << "\n\n";
if(m_adapRem != NORMAL2) *out << std::endl;
}
void printAdapterOverlapStats2(){
if(m_a2->getNrModifiedReads() > 0)
*out << m_a2->getOverlapStatsString() << "\n\n";
*out << std::endl;
}
};
#endif
flexbar-3.5.0/src/PairedInput.h 0000664 0000000 0000000 00000015317 13464300463 0016340 0 ustar 00root root 0000000 0000000 // PairedInput.h
#ifndef FLEXBAR_PAIREDINPUT_H
#define FLEXBAR_PAIREDINPUT_H
#include "SeqInput.h"
template
class PairedInput : public tbb::filter {
private:
const flexbar::FileFormat m_format;
const bool m_isPaired, m_useBarRead, m_useNumberTag, m_interleaved;
const unsigned int m_bundleSize;
tbb::atomic m_uncalled, m_uncalledPairs, m_tagCounter, m_nBundles;
SeqInput *m_f1, *m_f2, *m_b;
public:
PairedInput(const Options &o) :
filter(serial_in_order),
m_format(o.format),
m_useNumberTag(o.useNumberTag),
m_interleaved(o.interleavedInput),
m_isPaired(o.isPaired),
m_useBarRead(o.barDetect == flexbar::BARCODE_READ),
m_bundleSize(o.bundleSize),
m_nBundles(o.nBundles),
m_tagCounter(0),
m_uncalled(0),
m_uncalledPairs(0){
m_f1 = new SeqInput(o, o.readsFile, true, o.useStdin);
m_f2 = NULL;
m_b = NULL;
if(m_isPaired && ! m_interleaved)
m_f2 = new SeqInput(o, o.readsFile2, true, false);
if(m_useBarRead)
m_b = new SeqInput(o, o.barReadsFile, false, false);
if(m_nBundles > 0) ++m_nBundles;
}
virtual ~PairedInput(){
delete m_f1;
delete m_f2;
delete m_b;
}
void* loadPairedReadBundle(){
using namespace std;
using namespace flexbar;
TSeqStrs seqs, seqs2, seqsBR;
TStrings ids, ids2, idsBR;
TStrings quals, quals2, qualsBR;
TBools uncalled, uncalled2, uncalledBR;
if(m_nBundles > 0){
if(m_nBundles-- == 1) return NULL;
}
unsigned int bundleSize = m_bundleSize;
if(m_interleaved) bundleSize = m_bundleSize * 2;
unsigned int nReads = m_f1->loadSeqReads(uncalled, ids, seqs, quals, bundleSize);
if(m_interleaved && nReads % 2 == 1){
cerr << "\nERROR: Interleaved reads input does not contain even number of reads.\n" << endl;
exit(1);
}
if(m_isPaired && ! m_interleaved){
unsigned int nReads2 = m_f2->loadSeqReads(uncalled2, ids2, seqs2, quals2, m_bundleSize);
if(nReads != nReads2){
cerr << "\nERROR: Read without counterpart in paired input mode.\n" << endl;
exit(1);
}
}
if(m_useBarRead){
unsigned int nBarReads = m_b->loadSeqReads(uncalledBR, idsBR, seqsBR, qualsBR, m_bundleSize);
unsigned int multi = 1;
if(m_interleaved) multi = 2;
if(nReads > nBarReads * multi){
cerr << "\nERROR: Read without barcode read in input.\n" << endl;
exit(1);
}
else if(nReads < nBarReads * multi){
cerr << "\nERROR: Barcode read without read in input.\n" << endl;
exit(1);
}
}
if(nReads == 0) return NULL;
TPairedReadBundle *prBundle = new TPairedReadBundle();
if(! m_interleaved){
for(unsigned int i = 0; i < length(ids); ++i){
if(uncalled[i] || (m_isPaired && uncalled2[i])){
if(uncalled[i]) ++m_uncalled;
if(m_isPaired && uncalled2[i]) ++m_uncalled;
if(m_isPaired) ++m_uncalledPairs;
}
// else if(m_useBarRead && uncalledBR[i]){
//
// // to be handled
// }
else{
if(m_useNumberTag){
stringstream converter;
converter << ++m_tagCounter;
TString tagCount = converter.str();
ids[i] = tagCount;
if(m_isPaired) ids2[i] = tagCount;
if(m_useBarRead) idsBR[i] = tagCount;
}
TSeqRead *read1 = NULL, *read2 = NULL, *barRead = NULL;
if(m_format == FASTA){
read1 = new TSeqRead(seqs[i], ids[i]);
if(m_isPaired) read2 = new TSeqRead(seqs2[i], ids2[i]);
if(m_useBarRead) barRead = new TSeqRead(seqsBR[i], idsBR[i]);
}
else{
read1 = new TSeqRead(seqs[i], ids[i], quals[i]);
if(m_isPaired) read2 = new TSeqRead(seqs2[i], ids2[i], quals2[i]);
if(m_useBarRead) barRead = new TSeqRead(seqsBR[i], idsBR[i], qualsBR[i]);
}
prBundle->push_back(new TPairedRead(read1, read2, barRead));
}
}
}
else{ // interleaved paired input
unsigned int nEntries = (unsigned int) (length(ids) / 2);
for(unsigned int i = 0; i < nEntries; ++i){
unsigned int r = (i * 2);
unsigned int p = (i * 2) + 1;
if(uncalled[r] || uncalled[p]){
if(uncalled[r]) ++m_uncalled;
if(uncalled[p]) ++m_uncalled;
++m_uncalledPairs;
}
// else if(m_useBarRead && uncalledBR[i]){
//
// // to be handled
// }
else{
if(m_useNumberTag){
stringstream converter;
converter << ++m_tagCounter;
TString tagCount = converter.str();
ids[r] = tagCount;
ids[p] = tagCount;
if(m_useBarRead) idsBR[i] = tagCount;
}
TSeqRead *read1 = NULL, *read2 = NULL, *barRead = NULL;
if(m_format == FASTA){
read1 = new TSeqRead(seqs[r], ids[r]);
read2 = new TSeqRead(seqs[p], ids[p]);
if(m_useBarRead) barRead = new TSeqRead(seqsBR[i], idsBR[i]);
}
else{
read1 = new TSeqRead(seqs[r], ids[r], quals[r]);
read2 = new TSeqRead(seqs[p], ids[p], quals[p]);
if(m_useBarRead) barRead = new TSeqRead(seqsBR[i], idsBR[i], qualsBR[i]);
}
prBundle->push_back(new TPairedRead(read1, read2, barRead));
}
}
}
return prBundle;
}
// tbb filter operator
void* operator()(void*){
using namespace flexbar;
TPairedReadBundle *prBundle = NULL;
prBundle = static_cast< TPairedReadBundle* >(loadPairedReadBundle());
if(prBundle != NULL){
while(prBundle->size() == 0){
delete prBundle;
prBundle = NULL;
prBundle = static_cast< TPairedReadBundle* >(loadPairedReadBundle());
if(prBundle == NULL) return prBundle;
}
}
return prBundle;
}
// virtual
void finalize(void* item){
}
unsigned long getNrUncalledReads() const{
return m_uncalled;
}
unsigned long getNrUncalledPairedReads() const{
return m_uncalledPairs;
}
unsigned long getNrProcessedReads() const{
if(m_isPaired && ! m_interleaved) return m_f1->getNrProcessedReads() + m_f2->getNrProcessedReads();
else return m_f1->getNrProcessedReads();
}
unsigned long getNrProcessedChars() const{
if(m_isPaired && ! m_interleaved) return m_f1->getNrProcessedChars() + m_f2->getNrProcessedChars();
else return m_f1->getNrProcessedChars();
}
unsigned long getNrLowPhredReads() const {
if(m_isPaired && ! m_interleaved) return m_f1->getNrLowPhredReads() + m_f2->getNrLowPhredReads();
else return m_f1->getNrLowPhredReads();
}
};
#endif
flexbar-3.5.0/src/PairedOutput.h 0000664 0000000 0000000 00000033067 13464300463 0016543 0 ustar 00root root 0000000 0000000 // PairedOutput.h
#ifndef FLEXBAR_PAIREDOUTPUT_H
#define FLEXBAR_PAIREDOUTPUT_H
#include "SeqOutput.h"
#include "SeqOutputFiles.h"
#include "QualTrimming.h"
template
class PairedOutput : public tbb::filter {
private:
int m_mapsize;
const int m_minLength, m_qtrimThresh, m_qtrimWinSize;
const bool m_isPaired, m_writeUnassigned, m_writeSingleReads, m_writeSingleReadsP;
const bool m_twoBarcodes, m_qtrimPostRm;
tbb::atomic m_nSingleReads, m_nLowPhred;
const std::string m_target;
const flexbar::FileFormat m_format;
const flexbar::RunType m_runType;
const flexbar::BarcodeDetect m_barDetect;
const flexbar::QualTrimType m_qtrim;
const flexbar::AdapterTrimmed m_aTrimmed;
typedef SeqOutput TSeqOutput;
typedef SeqOutputFiles TOutFiles;
TOutFiles *m_outMap;
std::ostream *out;
tbb::concurrent_vector *m_adapters, *m_barcodes;
tbb::concurrent_vector *m_adapters2, *m_barcodes2;
public:
PairedOutput(Options &o) :
filter(serial_in_order),
m_target(o.targetName),
m_format(o.format),
m_runType(o.runType),
m_barDetect(o.barDetect),
m_minLength(o.min_readLen),
m_qtrim(o.qTrim),
m_qtrimThresh(o.qtrimThresh),
m_qtrimWinSize(o.qtrimWinSize),
m_qtrimPostRm(o.qtrimPostRm),
m_aTrimmed(o.aTrimmed),
m_isPaired(o.isPaired),
m_writeUnassigned(o.writeUnassigned),
m_writeSingleReads(o.writeSingleReads),
m_writeSingleReadsP(o.writeSingleReadsP),
m_twoBarcodes(o.barDetect == flexbar::WITHIN_READ_REMOVAL2 || o.barDetect == flexbar::WITHIN_READ2),
out(o.out){
using namespace std;
using namespace flexbar;
m_barcodes = &o.barcodes;
m_barcodes2 = &o.barcodes2;
m_adapters = &o.adapters;
m_adapters2 = &o.adapters2;
m_mapsize = 0;
m_nSingleReads = 0;
m_nLowPhred = 0;
switch(m_runType){
case PAIRED_BARCODED:{
int nBarcodes = m_barcodes->size();
if(m_twoBarcodes) nBarcodes *= m_barcodes2->size();
m_mapsize = nBarcodes + 1;
m_outMap = new TOutFiles[m_mapsize];
for(int i = 0; i < nBarcodes; ++i){
int idxB1 = i % m_barcodes->size();
int idxB2 = div(i, m_barcodes->size()).quot;
TString barcode = m_barcodes->at(idxB1).id;
if(m_twoBarcodes){
append(barcode, "-");
append(barcode, m_barcodes2->at(idxB2).id);
}
TString barcode1 = barcode;
TString barcode2 = barcode;
append(barcode1, "_1");
append(barcode2, "_2");
stringstream b1, b2;
b1 << barcode1;
b2 << barcode2;
string s = m_target + "_barcode_" + b1.str();
TSeqOutput *of1 = new TSeqOutput(s, barcode, false, o);
s = m_target + "_barcode_" + b2.str();
TSeqOutput *of2 = new TSeqOutput(s, barcode, false, o);
TOutFiles& f = m_outMap[i + 1];
f.f1 = of1;
f.f2 = of2;
if(m_writeSingleReads){
s = m_target + "_barcode_" + b1.str() + "_single";
TSeqOutput *osingle1 = new TSeqOutput(s, "", true, o);
s = m_target + "_barcode_" + b2.str() + "_single";
TSeqOutput *osingle2 = new TSeqOutput(s, "", true, o);
f.single1 = osingle1;
f.single2 = osingle2;
}
}
if(m_writeUnassigned){
string s = m_target + "_barcode_unassigned_1";
TSeqOutput *of1 = new TSeqOutput(s, "unassigned", false, o);
s = m_target + "_barcode_unassigned_2";
TSeqOutput *of2 = new TSeqOutput(s, "unassigned", false, o);
TOutFiles& f = m_outMap[0];
f.f1 = of1;
f.f2 = of2;
if(m_writeSingleReads){
s = m_target + "_barcode_unassigned_1_single";
TSeqOutput *osingle1 = new TSeqOutput(s, "", true, o);
s = m_target + "_barcode_unassigned_2_single";
TSeqOutput *osingle2 = new TSeqOutput(s, "", true, o);
f.single1 = osingle1;
f.single2 = osingle2;
}
}
break;
}
case PAIRED:{
m_mapsize = 1;
m_outMap = new TOutFiles[m_mapsize];
string s = m_target + "_1";
if(o.outReadsFile != "") s = o.outReadsFile;
TSeqOutput *of1 = new TSeqOutput(s, "", false, o);
s = m_target + "_2";
if(o.outReadsFile2 != "") s = o.outReadsFile2;
TSeqOutput *of2 = new TSeqOutput(s, "", false, o);
TOutFiles& f = m_outMap[0];
f.f1 = of1;
f.f2 = of2;
if(m_writeSingleReads){
s = m_target + "_1_single";
TSeqOutput *osingle1 = new TSeqOutput(s, "", true, o);
s = m_target + "_2_single";
TSeqOutput *osingle2 = new TSeqOutput(s, "", true, o);
f.single1 = osingle1;
f.single2 = osingle2;
}
break;
}
case SINGLE:{
m_mapsize = 1;
m_outMap = new TOutFiles[m_mapsize];
string s = m_target;
if(o.outReadsFile != "") s = o.outReadsFile;
TSeqOutput *of1 = new TSeqOutput(s, "", false, o);
TOutFiles& f = m_outMap[0];
f.f1 = of1;
break;
}
case SINGLE_BARCODED:{
m_mapsize = m_barcodes->size() + 1;
m_outMap = new TOutFiles[m_mapsize];
for(int i = 0; i < m_barcodes->size(); ++i){
TString barcode = m_barcodes->at(i).id;
stringstream b;
b << barcode;
string s = m_target + "_barcode_" + b.str();
TSeqOutput *of1 = new TSeqOutput(s, barcode, false, o);
TOutFiles& f = m_outMap[i + 1];
f.f1 = of1;
}
if(m_writeUnassigned){
string s = m_target + "_barcode_unassigned";
TSeqOutput *of1 = new TSeqOutput(s, "unassigned", false, o);
TOutFiles& f = m_outMap[0];
f.f1 = of1;
}
}
}
}
virtual ~PairedOutput(){
delete[] m_outMap;
};
void writePairedRead(flexbar::TPairedRead* pRead){
using namespace flexbar;
bool r1ok = false, r2ok = false;
switch(m_runType){
case SINGLE:
case SINGLE_BARCODED:{
if(pRead->r1 != NULL){
if(m_runType == SINGLE || m_writeUnassigned || pRead->barID > 0){
if(m_qtrim != QOFF && m_qtrimPostRm){
if(qualTrim(pRead->r1, m_qtrim, m_qtrimThresh, m_qtrimWinSize)) ++m_nLowPhred;
}
if(length(pRead->r1->seq) >= m_minLength) r1ok = true;
else m_outMap[pRead->barID].m_nShort_1++;
if (m_aTrimmed == ATOFF && (pRead->r1->rmAdapter || pRead->r1->rmAdapterRC)) r1ok = false;
else if(m_aTrimmed == ATONLY && ! pRead->r1->rmAdapter && ! pRead->r1->rmAdapterRC) r1ok = false;
if(r1ok) m_outMap[pRead->barID].f1->writeRead(pRead->r1);
}
}
break;
}
case PAIRED:
case PAIRED_BARCODED:{
if(pRead->r1 != NULL && pRead->r2 != NULL){
int outIdx = pRead->barID;
if(m_twoBarcodes){
if(outIdx == 0 || pRead->barID2 == 0){
outIdx = 0;
}
else outIdx += (pRead->barID2 - 1) * m_barcodes->size();
}
if(m_runType == PAIRED || m_writeUnassigned || outIdx > 0){
if(m_qtrim != QOFF && m_qtrimPostRm){
if(qualTrim(pRead->r1, m_qtrim, m_qtrimThresh, m_qtrimWinSize)) ++m_nLowPhred;
if(qualTrim(pRead->r2, m_qtrim, m_qtrimThresh, m_qtrimWinSize)) ++m_nLowPhred;
}
if(length(pRead->r1->seq) >= m_minLength) r1ok = true;
if(length(pRead->r2->seq) >= m_minLength) r2ok = true;
if(! r1ok) m_outMap[outIdx].m_nShort_1++;
if(! r2ok) m_outMap[outIdx].m_nShort_2++;
if (m_aTrimmed == ATOFF && (pRead->r1->rmAdapter || pRead->r1->rmAdapterRC || pRead->r1->poRemoval)) r1ok = false;
else if(m_aTrimmed == ATONLY && ! pRead->r1->rmAdapter && ! pRead->r1->rmAdapterRC && ! pRead->r1->poRemoval) r1ok = false;
if (m_aTrimmed == ATOFF && (pRead->r2->rmAdapter || pRead->r2->rmAdapterRC || pRead->r2->poRemoval)) r2ok = false;
else if(m_aTrimmed == ATONLY && ! pRead->r2->rmAdapter && ! pRead->r2->rmAdapterRC && ! pRead->r2->poRemoval) r2ok = false;
if(r1ok && r2ok){
m_outMap[outIdx].f1->writeRead(pRead->r1);
m_outMap[outIdx].f2->writeRead(pRead->r2);
}
else if(r1ok && ! r2ok){
m_nSingleReads++;
if(m_writeSingleReads){
m_outMap[outIdx].single1->writeRead(pRead->r1);
}
else if(m_writeSingleReadsP){
pRead->r2->seq = "N";
if(m_format == FASTQ)
pRead->r2->qual = prefix(pRead->r1->qual, 1);
m_outMap[outIdx].f1->writeRead(pRead->r1);
m_outMap[outIdx].f2->writeRead(pRead->r2);
}
}
else if(! r1ok && r2ok){
m_nSingleReads++;
if(m_writeSingleReads){
m_outMap[outIdx].single2->writeRead(pRead->r2);
}
else if(m_writeSingleReadsP){
pRead->r1->seq = "N";
if(m_format == FASTQ)
pRead->r1->qual = prefix(pRead->r2->qual, 1);
m_outMap[outIdx].f1->writeRead(pRead->r1);
m_outMap[outIdx].f2->writeRead(pRead->r2);
}
}
}
}
}
}
}
// tbb filter operator
void* operator()(void* item){
using namespace flexbar;
if(item != NULL){
TPairedReadBundle *prBundle = static_cast< TPairedReadBundle* >(item);
for(unsigned int i = 0; i < prBundle->size(); ++i){
writePairedRead(prBundle->at(i));
delete prBundle->at(i);
}
delete prBundle;
}
return NULL;
}
void writeLengthDist(){
for(unsigned int i = 0; i < m_mapsize; i++){
m_outMap[i].f1->writeLengthDist();
if(m_outMap[i].f2 != NULL)
m_outMap[i].f2->writeLengthDist();
}
}
unsigned long getNrSingleReads() const {
return m_nSingleReads;
}
unsigned long getNrLowPhredReads() const {
return m_nLowPhred;
}
unsigned long getNrGoodReads(){
using namespace flexbar;
unsigned long nGood = 0;
for(unsigned int i = 0; i < m_mapsize; i++){
if(m_barDetect == BOFF || m_writeUnassigned || i > 0){
nGood += m_outMap[i].f1->getNrGoodReads();
if(m_outMap[i].f2 != NULL){
nGood += m_outMap[i].f2->getNrGoodReads();
if(m_writeSingleReads){
nGood += m_outMap[i].single1->getNrGoodReads();
nGood += m_outMap[i].single2->getNrGoodReads();
}
}
}
}
return nGood;
}
unsigned long getNrGoodChars(){
using namespace flexbar;
unsigned long nGood = 0;
for(unsigned int i = 0; i < m_mapsize; i++){
if(m_barDetect == BOFF || m_writeUnassigned || i > 0){
nGood += m_outMap[i].f1->getNrGoodChars();
if(m_outMap[i].f2 != NULL){
nGood += m_outMap[i].f2->getNrGoodChars();
if(m_writeSingleReads){
nGood += m_outMap[i].single1->getNrGoodChars();
nGood += m_outMap[i].single2->getNrGoodChars();
}
}
}
}
return nGood;
}
unsigned long getNrShortReads(){
using namespace flexbar;
unsigned long nShort = 0;
for(unsigned int i = 0; i < m_mapsize; i++){
if(m_barDetect == BOFF || m_writeUnassigned || i > 0){
nShort += m_outMap[i].m_nShort_1;
if(m_isPaired)
nShort += m_outMap[i].m_nShort_2;
}
}
return nShort;
}
void printAdapterRemovalStats(const bool secondSet){
using namespace std;
tbb::concurrent_vector *adapters;
const unsigned int maxSpaceLen = 20;
int startLen = 8;
if(secondSet){
adapters = m_adapters2;
*out << "Adapter2";
startLen++;
}
else{
adapters = m_adapters;
*out << "Adapter removal statistics\n";
*out << "==========================\n";
*out << "Adapter";
}
*out << ":" << string(maxSpaceLen - startLen, ' ') << "Overlap removal:"
<< string(maxSpaceLen - 16, ' ') << "Full length:\n";
for(unsigned int i = 0; i < adapters->size(); i++){
TString seqTag = adapters->at(i).id;
int wsLen = maxSpaceLen - length(seqTag);
if(wsLen < 2) wsLen = 2;
string whiteSpace = string(wsLen, ' ');
unsigned long nAdapOvl = adapters->at(i).rmOverlap;
unsigned long nAdapFull = adapters->at(i).rmFull;
stringstream s; s << nAdapOvl;
int wsLen2 = maxSpaceLen - s.str().length();
if(wsLen2 < 2) wsLen2 = 2;
string whiteSpace2 = string(wsLen2, ' ');
*out << seqTag << whiteSpace << nAdapOvl << whiteSpace2 << nAdapFull << "\n";
}
*out << endl;
}
void printAdapterRemovalStats(){
printAdapterRemovalStats(false);
}
void printAdapterRemovalStats2(){
printAdapterRemovalStats(true);
}
void printFileSummary(){
using namespace std;
using namespace flexbar;
*out << "Output file statistics\n";
*out << "======================\n";
for(unsigned int i = 0; i < m_mapsize; i++){
if(m_barDetect == BOFF || m_writeUnassigned || i > 0){
*out << "Read file: " << m_outMap[i].f1->getFileName() << "\n";
*out << " written reads " << m_outMap[i].f1->getNrGoodReads() << "\n";
*out << " short reads " << m_outMap[i].m_nShort_1 << "\n";
if(m_isPaired){
*out << "Read file 2: " << m_outMap[i].f2->getFileName() << "\n";
*out << " written reads " << m_outMap[i].f2->getNrGoodReads() << "\n";
*out << " short reads " << m_outMap[i].m_nShort_2 << "\n";
if(m_writeSingleReads){
*out << "Single read file: " << m_outMap[i].single1->getFileName() << "\n";
*out << " written reads " << m_outMap[i].single1->getNrGoodReads() << "\n";
*out << "Single read file 2: " << m_outMap[i].single2->getFileName() << "\n";
*out << " written reads " << m_outMap[i].single2->getNrGoodReads() << "\n";
}
}
*out << endl;
}
}
*out << endl;
}
};
#endif
flexbar-3.5.0/src/QualTrimming.h 0000664 0000000 0000000 00000005712 13464300463 0016523 0 ustar 00root root 0000000 0000000 // QualTrimming.h
// Authors: Sebastian Roskosch
// Benjamin Menkuec
// Johannes Roehr
#ifndef FLEXBAR_QUALTRIMMING_H
#define FLEXBAR_QUALTRIMMING_H
struct Tail {};
struct BWA {};
struct Window {
unsigned size;
Window(unsigned s) : size(s) {}
};
template
inline unsigned getQuality(const TString& qual, unsigned i){
return static_cast(qual[i]);
}
// Tail trimming method
template
unsigned qualTrimming(const TString& qual, unsigned const cutoff, Tail const &){
for (int i = length(qual) - 1; i >= 0; --i){
if(getQuality(qual, i) >= cutoff) return i + 1;
}
return 0;
}
// Trim by shifting a window over the seq and cut where avg qual in window turns bad
template
unsigned qualTrimming(const TString& qual, unsigned const _cutoff, Window const & spec){
unsigned window = spec.size;
unsigned avg = 0, i = 0;
// Absolute cutoff in window to avoid divisions
unsigned cutoff = _cutoff * window;
// Calculate average quality of initial window
for (i = 0; i < window; ++i){
avg += getQuality(qual, i);
}
// Shift window over read and keep mean quality, update in constant time
for (i = 0; i < length(qual) && avg >= cutoff; ++i){
// Take care only not to go over the end of the sequence. Shorten window near the end
avg -= getQuality(qual, i);
if(i + window < length(qual)){
avg += getQuality(qual, i + window);
}
else{
cutoff = _cutoff * ((length(qual) - 1) - i);
}
}
return i; // holds start of first window that turned bad
}
// Trimming mechanism using BWA. Trim to argmax_x sum_{i=x+1}^l {cutoff - q_i}
template
unsigned qualTrimming(const TString& qual, unsigned const cutoff, BWA const &){
int max_arg = length(qual) - 1, sum = 0, max = 0;
for (int i = length(qual) - 1; i >= 0; --i){
sum += cutoff - getQuality(qual, i);
if(sum < 0){
break;
}
if(sum > max){
max = sum;
max_arg = i;
}
}
return max_arg + 1;
}
template
bool qualTrim(TSeqStr &seq, TString &qual, const flexbar::QualTrimType qtrim, const int cutoff, const int wSize){
using namespace seqan;
unsigned cutPos;
if(qtrim == flexbar::TAIL){
cutPos = qualTrimming(qual, cutoff, Tail());
}
else if(qtrim == flexbar::WIN){
cutPos = qualTrimming(qual, cutoff, Window(wSize));
}
else if(qtrim == flexbar::BWA){
cutPos = qualTrimming(qual, cutoff, BWA());
}
if(cutPos < length(qual)){
seq = prefix(seq, cutPos);
qual = prefix(qual, cutPos);
return true;
}
else return false;
}
template
bool qualTrim(SeqRead *seqRead, const flexbar::QualTrimType qtrim, const int cutoff, const int wSize){
TSeqStr seq = seqRead->seq;
TString qual = seqRead->qual;
bool trimmed = qualTrim(seq, qual, qtrim, cutoff, wSize);
if(trimmed){
seqRead->seq = seq;
seqRead->qual = qual;
}
return trimmed;
}
#endif
flexbar-3.5.0/src/SeqAlign.h 0000664 0000000 0000000 00000024066 13464300463 0015620 0 ustar 00root root 0000000 0000000 // SeqAlign.h
#ifndef FLEXBAR_SEQALIGN_H
#define FLEXBAR_SEQALIGN_H
template
class SeqAlign {
private:
typedef AlignResults TAlignResults;
const flexbar::LogAlign m_log;
const flexbar::FileFormat m_format;
const flexbar::PairOverlap m_poMode;
const bool m_isBarcoding, m_writeTag, m_umiTags, m_strictRegion, m_addBarcodeAdapter;
const int m_minLength, m_minOverlap, m_tailLength;
const float m_errorRate;
const unsigned int m_bundleSize;
tbb::atomic m_nPreShortReads, m_modified;
tbb::concurrent_vector *m_queries;
tbb::concurrent_vector m_rmOverlaps;
std::ostream *m_out;
TAlgorithm m_algo;
public:
SeqAlign(tbb::concurrent_vector *queries, const Options &o, int minOverlap, float errorRate, const int tailLength, const int match, const int mismatch, const int gapCost, const bool isBarcoding):
m_minOverlap(minOverlap),
m_errorRate(errorRate),
m_tailLength(tailLength),
m_isBarcoding(isBarcoding),
m_umiTags(o.umiTags),
m_minLength(o.min_readLen),
m_poMode(o.poMode),
m_log(o.logAlign),
m_format(o.format),
m_writeTag(o.useRemovalTag),
m_addBarcodeAdapter(o.addBarcodeAdapter),
m_strictRegion(! o.relaxRegion),
m_bundleSize(o.bundleSize),
m_out(o.out),
m_nPreShortReads(0),
m_modified(0),
m_algo(TAlgorithm(o, match, mismatch, gapCost, ! isBarcoding)){
m_queries = queries;
m_rmOverlaps = tbb::concurrent_vector(flexbar::MAX_READLENGTH + 1, 0);
};
int alignSeqRead(flexbar::TSeqRead* sr, const bool performRemoval, flexbar::Alignments &alignments, flexbar::ComputeCycle &cycle, unsigned int &idxAl, const flexbar::AlignmentMode &alMode, const flexbar::TrimEnd trimEnd, const TSeqStr &addBarcode){
using namespace std;
using namespace flexbar;
using seqan::prefix;
using seqan::suffix;
TSeqRead &seqRead = *sr;
int readLength = length(seqRead.seq);
if(! m_isBarcoding && readLength < m_minLength){
if(cycle != PRELOAD) ++m_nPreShortReads;
// return 0;
}
if(readLength < 1) return 0;
if(cycle == PRELOAD){
if(idxAl == 0) reserve(alignments.aset, m_bundleSize * m_queries->size());
for(unsigned int i = 0; i < m_queries->size(); ++i){
if (alMode == ALIGNRCOFF && m_queries->at(i).rcAdapter) continue;
else if(alMode == ALIGNRC && ! m_queries->at(i).rcAdapter) continue;
TSeqStr *qseq = &m_queries->at(i).seq;
TSeqStr *rseq = &seqRead.seq;
TSeqStr tmp, tmpq;
if(! m_isBarcoding && m_addBarcodeAdapter && addBarcode != ""){
tmpq = addBarcode;
append(tmpq, m_queries->at(i).seq);
qseq = &tmpq;
}
if(trimEnd == LTAIL || trimEnd == RTAIL){
int tailLength = (m_tailLength > 0) ? m_tailLength : length(*qseq);
if(tailLength < readLength){
if(trimEnd == LTAIL) tmp = prefix(seqRead.seq, tailLength);
else tmp = suffix(seqRead.seq, readLength - tailLength);
rseq = &tmp;
}
}
TAlign align;
appendValue(alignments.aset, align);
resize(rows(alignments.aset[idxAl]), 2);
assignSource(row(alignments.aset[idxAl], 0), *rseq);
assignSource(row(alignments.aset[idxAl], 1), *qseq);
++idxAl;
}
return 0;
}
TAlignResults am;
int qIndex = -1;
int amScore = numeric_limits::min();
// align each query sequence and store best one
for(unsigned int i = 0; i < m_queries->size(); ++i){
if (alMode == ALIGNRCOFF && m_queries->at(i).rcAdapter) continue;
else if(alMode == ALIGNRC && ! m_queries->at(i).rcAdapter) continue;
TAlignResults a;
// global sequence alignment
m_algo.alignGlobal(a, alignments, cycle, idxAl++, trimEnd);
a.queryLength = length(m_queries->at(i).seq);
if(! m_isBarcoding && m_addBarcodeAdapter && addBarcode != ""){
a.queryLength += length(addBarcode);
}
a.tailLength = (m_tailLength > 0) ? m_tailLength : a.queryLength;
a.overlapLength = a.endPos - a.startPos;
a.allowedErrors = m_errorRate * a.overlapLength;
float madeErrors = static_cast(a.mismatches + a.gapsR + a.gapsA);
int minOverlap = (m_isBarcoding && m_minOverlap == 0) ? a.queryLength : m_minOverlap;
if(! m_isBarcoding && m_poMode == PON && seqRead.pairOverlap &&
(trimEnd == RIGHT || trimEnd == RTAIL)) minOverlap = 1;
bool validAl = true;
if(((trimEnd == RTAIL || trimEnd == RIGHT) && a.startPosA < a.startPosS && m_strictRegion) ||
((trimEnd == LTAIL || trimEnd == LEFT) && a.endPosA > a.endPosS && m_strictRegion) ||
a.overlapLength < 1){
validAl = false;
}
// check if alignment is valid, score max, number of errors and overlap length
if(validAl && a.score > amScore && madeErrors <= a.allowedErrors && a.overlapLength >= minOverlap){
am = a;
amScore = a.score;
qIndex = i;
}
}
stringstream s;
// valid alignment
if(qIndex >= 0){
TrimEnd trEnd = trimEnd;
// trim read based on alignment
if(performRemoval){
if(trEnd == ANY){
if(am.startPosA <= am.startPosS && am.endPosS <= am.endPosA){
seqRead.seq = "";
if(m_format == FASTQ) seqRead.qual = "";
}
else if(am.startPosA - am.startPosS >= am.endPosS - am.endPosA){
trEnd = RIGHT;
}
else trEnd = LEFT;
}
switch(trEnd){
int rCutPos;
case LTAIL:
case LEFT:
rCutPos = am.endPos;
// translate alignment end pos to read idx
if(am.startPosS > 0) rCutPos -= am.startPosS;
// adjust to inner read gaps
rCutPos -= am.gapsR;
if(rCutPos > readLength) rCutPos = readLength;
erase(seqRead.seq, 0, rCutPos);
if(m_format == FASTQ)
erase(seqRead.qual, 0, rCutPos);
break;
case RTAIL:
// adjust cut pos to original read length
am.startPos += readLength - am.tailLength;
case RIGHT:
rCutPos = am.startPos;
// skipped restriction
if(rCutPos < 0) rCutPos = 0;
erase(seqRead.seq, rCutPos, readLength);
if(m_format == FASTQ)
erase(seqRead.qual, rCutPos, readLength);
break;
case ANY:;
}
++m_modified;
if(! m_isBarcoding){
if(! m_queries->at(qIndex).rcAdapter) seqRead.rmAdapter = true;
else seqRead.rmAdapterRC = true;
}
// count number of removals for each query
m_queries->at(qIndex).rmOverlap++;
if(am.overlapLength == am.queryLength)
m_queries->at(qIndex).rmFull++;
if(m_writeTag){
append(seqRead.id, "_Flexbar_removal");
if(! m_isBarcoding){
append(seqRead.id, "_");
append(seqRead.id, m_queries->at(qIndex).id);
}
}
// store overlap occurrences
if(am.overlapLength <= MAX_READLENGTH) m_rmOverlaps.at(am.overlapLength)++;
else cerr << "\nCompile Flexbar with larger max read length for correct overlap stats.\n" << endl;
}
// valid alignment, not neccesarily removal
if(m_umiTags && am.umiTag != ""){
append(seqRead.umi, "_");
append(seqRead.umi, am.umiTag);
}
// alignment stats
if(m_log == ALL || (m_log == MOD && performRemoval)){
if(performRemoval){
s << "Sequence removal:";
if(trEnd == LEFT || trEnd == LTAIL) s << " left side\n";
else if(trEnd == RIGHT || trEnd == RTAIL) s << " right side\n";
else s << " any side\n";
}
else s << "Sequence detection, no removal:\n";
s << " query id " << m_queries->at(qIndex).id << "\n"
<< " query pos " << am.startPosA << "-" << am.endPosA << "\n"
<< " read id " << seqRead.id << "\n"
<< " read pos " << am.startPosS << "-" << am.endPosS << "\n"
<< " score " << am.score << "\n"
<< " overlap " << am.overlapLength << "\n"
<< " errors " << am.gapsR + am.gapsA + am.mismatches << "\n"
<< " error threshold " << am.allowedErrors << "\n";
if(performRemoval){
s << " remaining read " << seqRead.seq << "\n";
if(m_format == FASTQ)
s << " remaining qual " << seqRead.qual << "\n";
}
s << "\n Alignment:\n" << endl << am.alString;
}
else if(m_log == TAB){
s << seqRead.id << "\t" << m_queries->at(qIndex).id << "\t"
<< am.startPosA << "\t" << am.endPosA << "\t" << am.overlapLength << "\t"
<< am.mismatches << "\t" << am.gapsR + am.gapsA << "\t" << am.allowedErrors << endl;
}
}
else if(m_log == ALL){
s << "Unvalid alignment:" << "\n"
<< "read id " << seqRead.id << "\n"
<< "read seq " << seqRead.seq << "\n\n" << endl;
}
*m_out << s.str();
return ++qIndex;
}
std::string getOverlapStatsString(){
using namespace std;
using namespace flexbar;
unsigned long nValues = 0, halfValues = 0, cumValues = 0, lenSum = 0;
unsigned int max = 0, median = 0, mean = 0;
unsigned int min = numeric_limits::max();
for(unsigned int i = 0; i <= MAX_READLENGTH; ++i){
unsigned long lenCount = m_rmOverlaps.at(i);
if(lenCount > 0 && i < min) min = i;
if(lenCount > 0 && i > max) max = i;
nValues += lenCount;
lenSum += lenCount * i;
}
halfValues = nValues / 2;
for(unsigned int i = 0; i <= MAX_READLENGTH; ++i){
cumValues += m_rmOverlaps.at(i);
if(cumValues >= halfValues){
median = i;
break;
}
}
if(m_modified > 0) mean = lenSum / m_modified;
stringstream s;
s << "Min, max, mean and median overlap: ";
s << min << " / " << max << " / " << mean << " / " << median;
return s.str();
}
unsigned long getNrPreShortReads() const {
return m_nPreShortReads;
}
unsigned long getNrModifiedReads() const {
return m_modified;
}
};
#endif
flexbar-3.5.0/src/SeqAlignAlgo.h 0000664 0000000 0000000 00000010566 13464300463 0016423 0 ustar 00root root 0000000 0000000 // SeqAlignAlgo.h
#ifndef FLEXBAR_SEQALIGNALGO_H
#define FLEXBAR_SEQALIGNALGO_H
template
class SeqAlignAlgo {
private:
typedef typename seqan::Value::Type TChar;
typedef typename seqan::Row::Type TRow;
typedef typename seqan::Iterator::Type TRowIterator;
typedef AlignResults TAlignResults;
typedef seqan::Score TScoreSimple;
typedef seqan::Score > TScoreMatrix;
// TScoreSimple m_score;
TScoreMatrix m_scoreMatrix;
const bool m_umiTags, m_isAdapterRm;
const flexbar::LogAlign m_log;
public:
SeqAlignAlgo(const Options &o, const int match, const int mismatch, const int gapCost, const bool isAdapterRm):
m_umiTags(o.umiTags),
m_isAdapterRm(isAdapterRm),
m_log(o.logAlign){
using namespace seqan;
// m_score = Score(match, mismatch, gapCost);
m_scoreMatrix = TScoreMatrix(gapCost);
for(unsigned i = 0; i < ValueSize::VALUE; ++i){
for(unsigned j = 0; j < ValueSize::VALUE; ++j){
if(i == j || TChar(j) == 'N' || (TChar(i) == 'N' && isAdapterRm))
setScore(m_scoreMatrix, TChar(i), TChar(j), match);
else setScore(m_scoreMatrix, TChar(i), TChar(j), mismatch);
}
}
// printScoreMatrix(m_scoreMatrix);
};
void alignGlobal(TAlignResults &a, flexbar::Alignments &alignments, flexbar::ComputeCycle &cycle, const unsigned int idxAl, const flexbar::TrimEnd trimEnd){
using namespace std;
using namespace seqan;
using namespace flexbar;
// int band1 = overhang;
// int band2 = readLen - minOvl;
// appendValue(alignments.ascores, 0);
// AlignConfig ac;
// alignments.ascores[idxAl] = globalAlignment(alignments.aset[idxAl], m_scoreMatrix, ac, band1, band2);
if(cycle == COMPUTE){
cycle = RESULTS;
if(trimEnd == RIGHT || trimEnd == RTAIL){
AlignConfig ac;
alignments.ascores = globalAlignment(alignments.aset, m_scoreMatrix, ac);
}
else if(trimEnd == LEFT || trimEnd == LTAIL){
AlignConfig ac;
alignments.ascores = globalAlignment(alignments.aset, m_scoreMatrix, ac);
}
else{
AlignConfig ac;
alignments.ascores = globalAlignment(alignments.aset, m_scoreMatrix, ac);
}
}
TAlign &align = alignments.aset[idxAl];
a.score = alignments.ascores[idxAl];
// cout << "Score: " << a.score << endl;
// cout << "Align: " << align << endl;
TRow &row1 = row(align, 0);
TRow &row2 = row(align, 1);
a.startPosS = toViewPosition(row1, 0);
a.startPosA = toViewPosition(row2, 0);
a.endPosS = toViewPosition(row1, length(source(row1)));
a.endPosA = toViewPosition(row2, length(source(row2)));
a.startPos = (a.startPosA > a.startPosS) ? a.startPosA : a.startPosS;
a.endPos = (a.endPosA > a.endPosS) ? a.endPosS : a.endPosA;
// cout << startPosS << endl << startPosA << endl;
// cout << endPosS << endl << endPosA << endl;
if(m_log != NONE){
stringstream s;
s << align;
a.alString = s.str();
}
if(m_umiTags) a.umiTag = "";
TRowIterator it1 = begin(row1);
TRowIterator it2 = begin(row2);
int alPos = 0;
a.gapsR = 0;
a.gapsA = 0;
a.mismatches = 0;
for(; it1 != end(row1); ++it1){
if(a.startPos <= alPos && alPos < a.endPos){
if(isGap(it1)) ++a.gapsR;
else if(isGap(it2)) ++a.gapsA;
else if(*it1 != *it2 && *it2 != 'N' && (*it1 != 'N' || ! m_isAdapterRm)) ++a.mismatches;
else if(m_umiTags && *it2 == 'N') append(a.umiTag, (TChar) *it1);
}
++alPos;
++it2;
}
// cout << gapsR << endl << gapsA << endl << mismatches << endl;
}
void printScoreMatrix(TScoreMatrix &scoreMatrix){
using namespace std;
using namespace seqan;
cout << endl;
for(unsigned i = 0; i < ValueSize::VALUE; ++i)
cout << "\t" << TChar(i);
cout << endl;
for(unsigned i = 0; i < ValueSize::VALUE; ++i){
cout << TChar(i);
for(unsigned j = 0; j < ValueSize::VALUE; ++j)
cout << "\t" << score(scoreMatrix, TChar(i), TChar(j));
cout << endl;
}
}
};
#endif
flexbar-3.5.0/src/SeqAlignPair.h 0000664 0000000 0000000 00000015240 13464300463 0016426 0 ustar 00root root 0000000 0000000 // SeqAlignPair.h
#ifndef FLEXBAR_SEQALIGNPAIR_H
#define FLEXBAR_SEQALIGNPAIR_H
template
class SeqAlignPair {
private:
typedef AlignResults TAlignResults;
const flexbar::LogAlign m_log;
const flexbar::FileFormat m_format;
const flexbar::PairOverlap m_poMode;
const bool m_writeTag;
const int m_minLength, m_minOverlap, m_aMinOverlap;
const float m_errorRate;
const unsigned int m_bundleSize;
tbb::atomic m_nPreShortReads, m_overlaps, m_modified;
tbb::concurrent_vector m_overlapLengths;
std::ostream *m_out;
TAlgorithm m_algo;
public:
SeqAlignPair(const Options &o, const int minOverlap, const float errorRate, const int match, const int mismatch, const int gapCost):
m_minOverlap(minOverlap),
m_aMinOverlap(o.a_min_overlap),
m_errorRate(errorRate),
m_minLength(o.min_readLen),
m_poMode(o.poMode),
m_log(o.logAlign),
m_format(o.format),
m_writeTag(o.useRemovalTag),
m_bundleSize(o.bundleSize),
m_out(o.out),
m_nPreShortReads(0),
m_overlaps(0),
m_modified(0),
m_algo(TAlgorithm(o, match, mismatch, gapCost, true)){
m_overlapLengths = tbb::concurrent_vector(flexbar::MAX_READLENGTH + 1, 0);
};
void alignSeqReadPair(flexbar::TSeqRead* sr, flexbar::TSeqRead* sr2, flexbar::Alignments &alignments, flexbar::ComputeCycle &cycle, unsigned int &idxAl){
using namespace std;
using namespace flexbar;
TSeqRead &seqRead = *sr;
TSeqRead &seqRead2 = *sr2;
int readLength = length(seqRead.seq);
int readLength2 = length(seqRead2.seq);
if(cycle != PRELOAD){
if(readLength < m_minLength) ++m_nPreShortReads;
if(readLength2 < m_minLength) ++m_nPreShortReads;
}
if(readLength < 1 || readLength2 < 1) return;
if(cycle == PRELOAD){
if(idxAl == 0) reserve(alignments.aset, m_bundleSize);
TSeqStr rcSeq2 = seqRead2.seq;
seqan::reverseComplement(rcSeq2);
TAlign align;
appendValue(alignments.aset, align);
resize(rows(alignments.aset[idxAl]), 2);
assignSource(row(alignments.aset[idxAl], 0), seqRead.seq);
assignSource(row(alignments.aset[idxAl], 1), rcSeq2);
++idxAl;
return;
}
TAlignResults a;
m_algo.alignGlobal(a, alignments, cycle, idxAl++, ANY);
a.overlapLength = a.endPos - a.startPos;
a.allowedErrors = m_errorRate * a.overlapLength;
float madeErrors = static_cast(a.mismatches + a.gapsR + a.gapsA);
stringstream s;
// check if alignment is valid, number of errors and overlap length
if((a.startPosA < a.startPosS || a.endPosA < a.endPosS) && madeErrors <= a.allowedErrors && a.overlapLength >= m_minOverlap){
if(a.startPosA < a.startPosS){
seqRead2.pairOverlap = true;
if(m_poMode == PONLY || (m_poMode == PSHORT && a.startPosS < m_aMinOverlap)){
unsigned int rCutPos = readLength2 - a.startPosS;
erase(seqRead2.seq, rCutPos, readLength2);
if(m_format == FASTQ)
erase(seqRead2.qual, rCutPos, readLength2);
++m_modified;
seqRead2.poRemoval = true;
if(m_writeTag) append(seqRead2.id, "_Flexbar_removal_PO");
}
}
if(a.endPosA < a.endPosS){
seqRead.pairOverlap = true;
if(m_poMode == PONLY || (m_poMode == PSHORT && (a.endPosS - a.endPosA) < m_aMinOverlap)){
unsigned int rCutPos = readLength - (a.endPosS - a.endPosA);
erase(seqRead.seq, rCutPos, readLength);
if(m_format == FASTQ)
erase(seqRead.qual, rCutPos, readLength);
++m_modified;
seqRead.poRemoval = true;
if(m_writeTag) append(seqRead.id, "_Flexbar_removal_PO");
}
}
++m_overlaps;
// store overlap occurrences
if(a.overlapLength <= MAX_READLENGTH) m_overlapLengths.at(a.overlapLength)++;
else cerr << "\nCompile Flexbar with larger max read length for correct overlap stats.\n" << endl;
// alignment stats
if(m_log == ALL || m_log == MOD){
s << "Sequence removal:\n";
s << " read id " << seqRead.id << "\n"
<< " read pos " << a.startPosS << "-" << a.endPosS << "\n"
<< " read2 id " << seqRead2.id << "\n"
<< " read2 pos " << a.startPosA << "-" << a.endPosA << "\n"
<< " score " << a.score << "\n"
<< " overlap " << a.overlapLength << "\n"
<< " errors " << a.gapsR + a.gapsA + a.mismatches << "\n"
<< " error threshold " << a.allowedErrors << "\n"
<< " remaining read " << seqRead.seq << "\n";
if(m_format == FASTQ)
s << " remaining qual " << seqRead.qual << "\n";
s << " remaining read2 " << seqRead2.seq << "\n";
if(m_format == FASTQ)
s << " remaining qual2 " << seqRead2.qual << "\n";
s << "\n Alignment:\n" << endl << a.alString;
}
else if(m_log == TAB){
s << seqRead.id << "\t" << seqRead2.id << "\t"
<< a.startPosA << "\t" << a.endPosA << "\t" << a.overlapLength << "\t"
<< a.mismatches << "\t" << a.gapsR + a.gapsA << "\t" << a.allowedErrors << endl;
}
}
else if(m_log == ALL){
s << "Unvalid alignment:" << "\n"
<< "read id " << seqRead.id << "\n"
<< "read2 id " << seqRead2.id << "\n\n" << endl;
}
*m_out << s.str();
return;
}
std::string getOverlapStatsString(){
using namespace std;
using namespace flexbar;
unsigned long nValues = 0, halfValues = 0, cumValues = 0, lenSum = 0;
unsigned int max = 0, median = 0, mean = 0;
unsigned int min = numeric_limits::max();
for(unsigned int i = 0; i <= MAX_READLENGTH; ++i){
unsigned long lenCount = m_overlapLengths.at(i);
if(lenCount > 0 && i < min) min = i;
if(lenCount > 0 && i > max) max = i;
nValues += lenCount;
lenSum += lenCount * i;
}
halfValues = nValues / 2;
for(unsigned int i = 0; i <= MAX_READLENGTH; ++i){
cumValues += m_overlapLengths.at(i);
if(cumValues >= halfValues){
median = i;
break;
}
}
if(m_overlaps > 0) mean = lenSum / m_overlaps;
stringstream s;
if(m_modified > 0){
s << "Number of trimmed reads based on pair overlap: ";
s << m_modified << "\n";
}
s << "Min, max, mean and median overlap of paired reads: ";
s << min << " / " << max << " / " << mean << " / " << median;
return s.str();
}
unsigned long getNrPreShortReads() const {
return m_nPreShortReads;
}
unsigned long getNrOverlappingReads() const {
return m_overlaps;
}
};
#endif
flexbar-3.5.0/src/SeqInput.h 0000664 0000000 0000000 00000010612 13464300463 0015655 0 ustar 00root root 0000000 0000000 // SeqInput.h
#ifndef FLEXBAR_SEQINPUT_H
#define FLEXBAR_SEQINPUT_H
#include
#include "QualTrimming.h"
template
class SeqInput {
private:
seqan::FlexbarReadsSeqFileIn seqFileIn;
const flexbar::QualTrimType m_qtrim;
const flexbar::FileFormat m_format;
const bool m_preProcess, m_useStdin, m_qtrimPostRm, m_iupacInput;
const int m_maxUncalled, m_preTrimBegin, m_preTrimEnd, m_qtrimThresh, m_qtrimWinSize;
tbb::atomic m_nrReads, m_nrChars, m_nLowPhred;
public:
SeqInput(const Options &o, const std::string filePath, const bool preProcess, const bool useStdin) :
m_preProcess(preProcess),
m_useStdin(useStdin),
m_maxUncalled(o.maxUncalled),
m_preTrimBegin(o.cutLen_begin),
m_preTrimEnd(o.cutLen_end),
m_qtrim(o.qTrim),
m_qtrimThresh(o.qtrimThresh),
m_qtrimWinSize(o.qtrimWinSize),
m_qtrimPostRm(o.qtrimPostRm),
m_iupacInput(o.iupacInput),
m_format(o.format),
m_nrReads(0),
m_nrChars(0),
m_nLowPhred(0){
using namespace std;
if(m_useStdin){
if(! open(seqFileIn, cin)){
cerr << "\nERROR: Could not open input stream.\n" << endl;
exit(1);
}
}
else{
if(! open(seqFileIn, filePath.c_str())){
cerr << "\nERROR: Could not open file " << filePath << "\n" << endl;
exit(1);
}
}
};
virtual ~SeqInput(){
close(seqFileIn);
};
// returns number of read SeqReads
unsigned int loadSeqReads(seqan::StringSet &uncalled, flexbar::TStrings &ids, flexbar::TSeqStrs &seqs, flexbar::TStrings &quals, const unsigned int nReads){
using namespace std;
using namespace flexbar;
using seqan::prefix;
using seqan::suffix;
using seqan::length;
try{
if(! atEnd(seqFileIn)){
reserve(ids, nReads);
reserve(seqs, nReads);
reserve(uncalled, nReads);
if(! m_iupacInput){
if(m_format == FASTA){
readRecords(ids, seqs, seqFileIn, nReads);
}
else{
reserve(quals, nReads);
readRecords(ids, seqs, quals, seqFileIn, nReads);
}
}
else{
seqan::StringSet seqsIupac;
reserve(seqsIupac, nReads);
if(m_format == FASTA){
readRecords(ids, seqsIupac, seqFileIn, nReads);
}
else{
reserve(quals, nReads);
readRecords(ids, seqsIupac, quals, seqFileIn, nReads);
}
seqs = seqsIupac;
}
for(unsigned int i = 0; i < length(ids); ++i){
TString &id = ids[i];
TSeqStr &seq = seqs[i];
if(length(id) < 1){
cerr << "\nERROR: Input read without name.\n" << endl;
close(seqFileIn);
exit(1);
}
if(length(seq) < 1){
cerr << "\nERROR: Input read without sequence.\n" << endl;
close(seqFileIn);
exit(1);
}
m_nrChars += length(seq);
appendValue(uncalled, isUncalledSequence(seq));
if(m_preProcess){
if(m_preTrimBegin > 0 && length(seq) > 1){
int idx = m_preTrimBegin;
if(idx >= length(seq)) idx = length(seq) - 1;
erase(seq, 0, idx);
if(m_format == FASTQ)
erase(quals[i], 0, idx);
}
if(m_preTrimEnd > 0 && length(seq) > 1){
int idx = m_preTrimEnd;
if(idx >= length(seq)) idx = length(seq) - 1;
seq = prefix(seq, length(seq) - idx);
if(m_format == FASTQ)
quals[i] = prefix(quals[i], length(quals[i]) - idx);
}
if(m_qtrim != QOFF && ! m_qtrimPostRm){
if(qualTrim(seq, quals[i], m_qtrim, m_qtrimThresh, m_qtrimWinSize)) ++m_nLowPhred;
}
}
}
m_nrReads += length(ids);
return length(ids);
}
else return 0; // end of file
}
catch(seqan::Exception const &e){
cerr << "\nERROR: " << e.what() << "\nProgram execution aborted.\n" << endl;
close(seqFileIn);
exit(1);
}
}
// returns TRUE if read contains too many uncalled bases
bool isUncalledSequence(TSeqStr &seq){
using namespace seqan;
typename Iterator::Type it, itEnd;
it = begin(seq);
itEnd = end(seq);
int n = 0;
while(it != itEnd){
if(*it == 'N') n++;
++it;
}
return(n > m_maxUncalled);
}
unsigned long getNrLowPhredReads() const {
return m_nLowPhred;
}
unsigned long getNrProcessedReads() const {
return m_nrReads;
}
unsigned long getNrProcessedChars() const {
return m_nrChars;
}
};
#endif
flexbar-3.5.0/src/SeqOutput.h 0000664 0000000 0000000 00000010032 13464300463 0016052 0 ustar 00root root 0000000 0000000 // SeqOutput.h
#ifndef FLEXBAR_SEQOUTPUT_H
#define FLEXBAR_SEQOUTPUT_H
template
class SeqOutput {
private:
seqan::FlexbarReadsSeqFileOut seqFileOut;
std::string m_filePath;
const TString m_tagStr;
const flexbar::FileFormat m_format;
const flexbar::CompressionType m_cmprsType;
const bool m_switch2Fasta, m_writeLenDist, m_useStdout;
const unsigned int m_minLength, m_cutLen_read;
tbb::atomic m_countGood, m_countGoodChars;
tbb::concurrent_vector m_lengthDist;
public:
SeqOutput(const std::string &filePath, const TString tagStr, const bool alwaysFile, const Options &o) :
m_format(o.format),
m_switch2Fasta(o.switch2Fasta),
m_tagStr(tagStr),
m_minLength(o.min_readLen),
m_cutLen_read(o.cutLen_read),
m_writeLenDist(o.writeLengthDist),
m_useStdout(o.useStdout && ! alwaysFile),
m_cmprsType(o.cmprsType),
m_countGood(0),
m_countGoodChars(0){
using namespace std;
using namespace flexbar;
m_filePath = filePath;
if(filePath != o.outReadsFile && filePath != o.outReadsFile2){
if(m_format == FASTA || m_switch2Fasta)
m_filePath += getExtension(FASTA);
else m_filePath += getExtension(FASTQ);
}
m_filePath += o.outCompression;
m_lengthDist = tbb::concurrent_vector(MAX_READLENGTH + 1, 0);
if(m_useStdout){
if(m_format == FASTA || m_switch2Fasta)
setFormat(seqFileOut, seqan::Fasta());
else setFormat(seqFileOut, seqan::Fastq());
if(! open(seqFileOut, cout)){
cerr << "\nERROR: Could not open output stream." << "\n" << endl;
exit(1);
}
}
else{
if(! open(seqFileOut, m_filePath.c_str())){
cerr << "\nERROR: Could not open file " << m_filePath << "\n" << endl;
exit(1);
}
}
};
virtual ~SeqOutput(){
if(! m_useStdout) close(seqFileOut);
};
const std::string getFileName(){
if(! m_useStdout) return m_filePath;
else return "stdout";
}
void writeLengthDist(){
using namespace std;
string fname = m_filePath + ".lengthdist";
fstream lstream;
lstream.open(fname.c_str(), ios::out | ios::binary);
if(! lstream.is_open()){
cerr << "\nERROR: Could not open file " << fname << "\n";
}
else{
lstream << "Readlength\tCount" << "\n";
for (int i = 0; i <= flexbar::MAX_READLENGTH; ++i){
if(m_lengthDist.at(i) > 0)
lstream << i << "\t" << m_lengthDist.at(i) << "\n";
}
lstream.close();
}
}
void writeSeqRead(flexbar::TSeqRead &seqRead){
using namespace std;
using namespace flexbar;
if(m_useStdout && m_tagStr != ""){
append(seqRead.id, "_");
append(seqRead.id, m_tagStr);
}
try{
if(m_format == FASTA || m_switch2Fasta){
writeRecord(seqFileOut, seqRead.id, seqRead.seq);
}
else{
writeRecord(seqFileOut, seqRead.id, seqRead.seq, seqRead.qual);
}
}
catch(seqan::Exception const &e){
cerr << "\nERROR: " << e.what() << "\nProgram execution aborted.\n" << endl;
close(seqFileOut);
exit(1);
}
}
unsigned long getNrGoodReads() const {
return m_countGood;
}
unsigned long getNrGoodChars() const {
return m_countGoodChars;
}
void* writeRead(void* item){
using namespace std;
using namespace flexbar;
if(item){
SeqRead *seqRead = static_cast< SeqRead* >(item);
unsigned int readLength = length(seqRead->seq);
if(m_cutLen_read > 1 && m_cutLen_read >= m_minLength && m_cutLen_read < readLength){
seqRead->seq = prefix(seqRead->seq, m_cutLen_read);
if(m_format == FASTQ)
seqRead->qual = prefix(seqRead->qual, m_cutLen_read);
readLength = m_cutLen_read;
}
m_countGoodChars += readLength;
++m_countGood;
// store read length distribution
if(m_writeLenDist && readLength <= MAX_READLENGTH)
m_lengthDist.at(readLength)++;
else if(m_writeLenDist)
cerr << "\nCompile Flexbar with larger max read length to get correct length dist.\n" << endl;
writeSeqRead(*seqRead);
}
return NULL;
}
};
#endif
flexbar-3.5.0/src/SeqOutputFiles.h 0000664 0000000 0000000 00000001374 13464300463 0017046 0 ustar 00root root 0000000 0000000 // SeqOutputFiles.h
#ifndef FLEXBAR_SEQOUTPUTFILES_H
#define FLEXBAR_SEQOUTPUTFILES_H
#include "SeqOutput.h"
template
class SeqOutputFiles {
public:
typedef SeqOutput TSeqOutput;
TSeqOutput *f1, *f2, *single1, *single2;
tbb::atomic m_nShort_1, m_nShort_2;
SeqOutputFiles() :
f1(0),
f2(0),
single1(0),
single2(0),
m_nShort_1(0),
m_nShort_2(0){
};
virtual ~SeqOutputFiles(){
delete f1;
delete f2;
delete single1;
delete single2;
};
private:
// forbid copying this object to call destructor only once
// (pointing to unique objects)
SeqOutputFiles(SeqOutputFiles&);
SeqOutputFiles& operator =(const SeqOutputFiles& rhs);
};
#endif
flexbar-3.5.0/test/ 0000775 0000000 0000000 00000000000 13464300463 0014124 5 ustar 00root root 0000000 0000000 flexbar-3.5.0/test/adapters.fasta 0000664 0000000 0000000 00000000014 13464300463 0016742 0 ustar 00root root 0000000 0000000 >ad1
CGTCTT
flexbar-3.5.0/test/adapters1.fasta 0000664 0000000 0000000 00000000030 13464300463 0017021 0 ustar 00root root 0000000 0000000 >adapter1
CCCATAAATACAG
flexbar-3.5.0/test/adapters2.fasta 0000664 0000000 0000000 00000000034 13464300463 0017026 0 ustar 00root root 0000000 0000000 >adapter2
CATACATGGCATAGACA
flexbar-3.5.0/test/barcodes.fasta 0000664 0000000 0000000 00000000045 13464300463 0016725 0 ustar 00root root 0000000 0000000 >Barcode1
AAAAAAA
>Barcode2
TCGTTCAG
flexbar-3.5.0/test/barcodes_N.fasta 0000664 0000000 0000000 00000000045 13464300463 0017202 0 ustar 00root root 0000000 0000000 >Barcode1
AANNAAA
>Barcode2
TCGTTCAG
flexbar-3.5.0/test/correct_result_any.fasta 0000664 0000000 0000000 00000002372 13464300463 0021056 0 ustar 00root root 0000000 0000000 >left_tail:shouldnt work - right_tail:shouldnt work - rigth:end with ATGC - left:discarded(C)
TGAGATCGTTCAGTACGGCAATCGTATGC
>left_tail:shouldnt work - right_tail:end with ATGC - right:end_with_ATGC - left:discarded_empty_read(N)
TGAGATCGTTCAGTACGGCAATTCGTATGC
>left_tail:shouldnt work - right_tail:shouldnt work - right:should_end_with_TGC-left:Discarded(T)
TGAGATCGTTCAGTACGGCAATCGTATGC
>left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N)
TCACCGGGTGGAAACTAGCCCCCCCCCCCAC
>left:begin_with_G-left_tail:should_work(begin with G) - right_tail:shouldnt work - right:discarded
GAAAAAAACCCCCCCCCCTTTTTTTTTTTT
>left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded
AAAAAAACCCCCCCCCCTTTTTTTTTTTTTT
>left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain
CATTATACAGAACACAGCAT
>left_tail:shouldnt work - right_tail:shouldnt work - right:discarded
CATTATACAGAACACAGCAT
>left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains
AAAAAATTTTTTAAAAAA
>left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp
TGGAAAAAAAAAAGCCCCAG
>left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:Discarded_9bp
TGGAAAAAAAAAAAGCCCCAG
flexbar-3.5.0/test/correct_result_any.fastq 0000664 0000000 0000000 00000003062 13464300463 0021073 0 ustar 00root root 0000000 0000000 @left_tail:shouldnt work - right_tail:shouldnt work - rigth:end with ATGC - left:discarded(C)
TGAGATCGTTCAGTACGGCAATCGTATGC
+
BSSMNXUTVX``[````\`___^_^_`_`
@left_tail:shouldnt work - right_tail:end with ATGC - right:end_with_ATGC - left:discarded_empty_read(N)
TGAGATCGTTCAGTACGGCAATTCGTATGC
+
BSSMNXUTVX``[````\`___^_^_`_`_
@left_tail:shouldnt work - right_tail:shouldnt work - right:should_end_with_TGC-left:Discarded(T)
TGAGATCGTTCAGTACGGCAATCGTATGC
+
BSSMNXUTVX``[````\`___^_^_`_`
@left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N)
TCACCGGGTGGAAACTAGCCCCCCCCCCCAC
+
BSSMNXUTVX``[````\`___^_^_`_`_`
@left:begin_with_G-left_tail:should_work(begin with G) - right_tail:shouldnt work - right:discarded
GAAAAAAACCCCCCCCCCTTTTTTTTTTTT
+
UTVX``[````\`___^_^_`_`_``^_^X
@left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded
AAAAAAACCCCCCCCCCTTTTTTTTTTTTTT
+
XUTVX``[````\`___^_^_`_`_``^_^X
@left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain
CATTATACAGAACACAGCAT
+
`\`___^_^_`_`_``^_^X
@left_tail:shouldnt work - right_tail:shouldnt work - right:discarded
CATTATACAGAACACAGCAT
+
``\`___^_^_`_`_``^_^
@left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains
AAAAAATTTTTTAAAAAA
+
`___^_^_`_`_``^_^X
@left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp
TGGAAAAAAAAAAGCCCCAG
+
BSSMNXUTVX``[````\`_
@left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:Discarded_9bp
TGGAAAAAAAAAAAGCCCCAG
+
BSSMNXUTVX``[````\`__
flexbar-3.5.0/test/correct_result_left.fasta 0000664 0000000 0000000 00000001377 13464300463 0021225 0 ustar 00root root 0000000 0000000 >left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N)
TCACCGGGTGGAAACTAGCCCCCCCCCCCACCGTCT
>left:begin_with_G-left_tail:should_work(begin with G) - right_tail:shouldnt work - right:discarded
GAAAAAAACCCCCCCCCCTTTTTTTTTTTT
>left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded
AAAAAAACCCCCCCCCCTTTTTTTTTTTTTT
>left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain
CATTATACAGAACACAGCAT
>left_tail:shouldnt work - right_tail:shouldnt work - right:discarded
CATTATACAGAACACAGCAT
>left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains
AAAAAATTTTTTAAAAAA
>left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp
AAAAAATTTT
flexbar-3.5.0/test/correct_result_left.fastq 0000664 0000000 0000000 00000001671 13464300463 0021242 0 ustar 00root root 0000000 0000000 @left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N)
TCACCGGGTGGAAACTAGCCCCCCCCCCCACCGTCT
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^X
@left:begin_with_G-left_tail:should_work(begin with G) - right_tail:shouldnt work - right:discarded
GAAAAAAACCCCCCCCCCTTTTTTTTTTTT
+
UTVX``[````\`___^_^_`_`_``^_^X
@left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded
AAAAAAACCCCCCCCCCTTTTTTTTTTTTTT
+
XUTVX``[````\`___^_^_`_`_``^_^X
@left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain
CATTATACAGAACACAGCAT
+
`\`___^_^_`_`_``^_^X
@left_tail:shouldnt work - right_tail:shouldnt work - right:discarded
CATTATACAGAACACAGCAT
+
``\`___^_^_`_`_``^_^
@left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains
AAAAAATTTTTTAAAAAA
+
`___^_^_`_`_``^_^X
@left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp
AAAAAATTTT
+
`_`_``^_^X
flexbar-3.5.0/test/correct_result_left_tail.fasta 0000664 0000000 0000000 00000002663 13464300463 0022235 0 ustar 00root root 0000000 0000000 >left_tail:shouldnt work - right_tail:shouldnt work - rigth:end with ATGC - left:discarded(C)
TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTC
>left_tail:shouldnt work - right_tail:end with ATGC - right:end_with_ATGC - left:discarded_empty_read(N)
TGAGATCGTTCAGTACGGCAATTCGTATGCCGTCTT
>left_tail:shouldnt work - right_tail:shouldnt work - right:should_end_with_TGC-left:Discarded(T)
TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTT
>left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N)
TCACCGGGTGGAAACTAGCCCCCCCCCCCACCGTCT
>left:begin_with_G-left_tail:should_work(begin with G) - right_tail:shouldnt work - right:discarded
GAAAAAAACCCCCCCCCCTTTTTTTTTTTT
>left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded
AAAAAAACCCCCCCCCCTTTTTTTTTTTTTT
>left:should_work right:discarded! - right_tail:works,discarded
AAAAAAAACGTCTT
>left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain
AAAAAAAAAACGTCTTCATTATACAGAACACAGCAT
>left_tail:shouldnt work - right_tail:shouldnt work - right:discarded
AAAAAAAAACGTCTTCATTATACAGAACACAGCAT
>left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains
TGGAAGCCCCAGCGTCTTAAAAAATTTTTTAAAAAA
>left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp
TGGAAAAAAAAAAGCCCCAGCGTCTTAAAAAATTTT
>left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:Discarded_9bp
TGGAAAAAAAAAAAGCCCCAGCGTCTTAAAAATTTT
flexbar-3.5.0/test/correct_result_left_tail.fastq 0000664 0000000 0000000 00000003545 13464300463 0022255 0 ustar 00root root 0000000 0000000 @left_tail:shouldnt work - right_tail:shouldnt work - rigth:end with ATGC - left:discarded(C)
TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTC
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^X
@left_tail:shouldnt work - right_tail:end with ATGC - right:end_with_ATGC - left:discarded_empty_read(N)
TGAGATCGTTCAGTACGGCAATTCGTATGCCGTCTT
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^X
@left_tail:shouldnt work - right_tail:shouldnt work - right:should_end_with_TGC-left:Discarded(T)
TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTT
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^X
@left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N)
TCACCGGGTGGAAACTAGCCCCCCCCCCCACCGTCT
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^X
@left:begin_with_G-left_tail:should_work(begin with G) - right_tail:shouldnt work - right:discarded
GAAAAAAACCCCCCCCCCTTTTTTTTTTTT
+
UTVX``[````\`___^_^_`_`_``^_^X
@left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded
AAAAAAACCCCCCCCCCTTTTTTTTTTTTTT
+
XUTVX``[````\`___^_^_`_`_``^_^X
@left:should_work right:discarded! - right_tail:works,discarded
AAAAAAAACGTCTT
+
BSSMNXUTVX``[`
@left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain
AAAAAAAAAACGTCTTCATTATACAGAACACAGCAT
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^X
@left_tail:shouldnt work - right_tail:shouldnt work - right:discarded
AAAAAAAAACGTCTTCATTATACAGAACACAGCAT
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^
@left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains
TGGAAGCCCCAGCGTCTTAAAAAATTTTTTAAAAAA
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^X
@left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp
TGGAAAAAAAAAAGCCCCAGCGTCTTAAAAAATTTT
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^X
@left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:Discarded_9bp
TGGAAAAAAAAAAAGCCCCAGCGTCTTAAAAATTTT
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^X
flexbar-3.5.0/test/correct_result_right.fasta 0000664 0000000 0000000 00000002221 13464300463 0021375 0 ustar 00root root 0000000 0000000 >left_tail:shouldnt work - right_tail:shouldnt work - rigth:end with ATGC - left:discarded(C)
TGAGATCGTTCAGTACGGCAATCGTATGC
>left_tail:shouldnt work - right_tail:end with ATGC - right:end_with_ATGC - left:discarded_empty_read(N)
TGAGATCGTTCAGTACGGCAATTCGTATGC
>left_tail:shouldnt work - right_tail:shouldnt work - right:should_end_with_TGC-left:Discarded(T)
TGAGATCGTTCAGTACGGCAATCGTATGC
>left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N)
TCACCGGGTGGAAACTAGCCCCCCCCCCCAC
>left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded
GTCTTAAAAAAACCCCCCCCCCTTTTTTTTTTTTTT
>left:begin_with_G, discarded - left_tail:should_work,but discarded! - right_tail:shouldnt work - right:discarded
TCTTGAAAAAAAA
>left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain
AAAAAAAAAA
>left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains
TGGAAGCCCCAG
>left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp
TGGAAAAAAAAAAGCCCCAG
>left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:Discarded_9bp
TGGAAAAAAAAAAAGCCCCAG
flexbar-3.5.0/test/correct_result_right.fastq 0000664 0000000 0000000 00000002626 13464300463 0021426 0 ustar 00root root 0000000 0000000 @left_tail:shouldnt work - right_tail:shouldnt work - rigth:end with ATGC - left:discarded(C)
TGAGATCGTTCAGTACGGCAATCGTATGC
+
BSSMNXUTVX``[````\`___^_^_`_`
@left_tail:shouldnt work - right_tail:end with ATGC - right:end_with_ATGC - left:discarded_empty_read(N)
TGAGATCGTTCAGTACGGCAATTCGTATGC
+
BSSMNXUTVX``[````\`___^_^_`_`_
@left_tail:shouldnt work - right_tail:shouldnt work - right:should_end_with_TGC-left:Discarded(T)
TGAGATCGTTCAGTACGGCAATCGTATGC
+
BSSMNXUTVX``[````\`___^_^_`_`
@left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N)
TCACCGGGTGGAAACTAGCCCCCCCCCCCAC
+
BSSMNXUTVX``[````\`___^_^_`_`_`
@left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded
GTCTTAAAAAAACCCCCCCCCCTTTTTTTTTTTTTT
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^X
@left:begin_with_G, discarded - left_tail:should_work,but discarded! - right_tail:shouldnt work - right:discarded
TCTTGAAAAAAAA
+
BSSMNXUTVX``[
@left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain
AAAAAAAAAA
+
BSSMNXUTVX
@left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains
TGGAAGCCCCAG
+
BSSMNXUTVX``
@left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp
TGGAAAAAAAAAAGCCCCAG
+
BSSMNXUTVX``[````\`_
@left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:Discarded_9bp
TGGAAAAAAAAAAAGCCCCAG
+
BSSMNXUTVX``[````\`__
flexbar-3.5.0/test/correct_result_right_tail.fasta 0000664 0000000 0000000 00000002743 13464300463 0022417 0 ustar 00root root 0000000 0000000 >left_tail:shouldnt work - right_tail:shouldnt work - rigth:end with ATGC - left:discarded(C)
TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTC
>left_tail:shouldnt work - right_tail:end with ATGC - right:end_with_ATGC - left:discarded_empty_read(N)
TGAGATCGTTCAGTACGGCAATTCGTATGC
>left_tail:shouldnt work - right_tail:shouldnt work - right:should_end_with_TGC-left:Discarded(T)
TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTT
>left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N)
TCACCGGGTGGAAACTAGCCCCCCCCCCCAC
>left:begin_with_G-left_tail:should_work(begin with G) - right_tail:shouldnt work - right:discarded
CGTCTTGAAAAAAACCCCCCCCCCTTTTTTTTTTTT
>left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded
GTCTTAAAAAAACCCCCCCCCCTTTTTTTTTTTTTT
>left:begin_with_G, discarded - left_tail:should_work,but discarded! - right_tail:shouldnt work - right:discarded
TCTTGAAAAAAAA
>left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain
AAAAAAAAAACGTCTTCATTATACAGAACACAGCAT
>left_tail:shouldnt work - right_tail:shouldnt work - right:discarded
AAAAAAAAACGTCTTCATTATACAGAACACAGCAT
>left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains
TGGAAGCCCCAGCGTCTTAAAAAATTTTTTAAAAAA
>left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp
TGGAAAAAAAAAAGCCCCAGCGTCTTAAAAAATTTT
>left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:Discarded_9bp
TGGAAAAAAAAAAAGCCCCAGCGTCTTAAAAATTTT
flexbar-3.5.0/test/correct_result_right_tail.fastq 0000664 0000000 0000000 00000003624 13464300463 0022436 0 ustar 00root root 0000000 0000000 @left_tail:shouldnt work - right_tail:shouldnt work - rigth:end with ATGC - left:discarded(C)
TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTC
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^X
@left_tail:shouldnt work - right_tail:end with ATGC - right:end_with_ATGC - left:discarded_empty_read(N)
TGAGATCGTTCAGTACGGCAATTCGTATGC
+
BSSMNXUTVX``[````\`___^_^_`_`_
@left_tail:shouldnt work - right_tail:shouldnt work - right:should_end_with_TGC-left:Discarded(T)
TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTT
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^X
@left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N)
TCACCGGGTGGAAACTAGCCCCCCCCCCCAC
+
BSSMNXUTVX``[````\`___^_^_`_`_`
@left:begin_with_G-left_tail:should_work(begin with G) - right_tail:shouldnt work - right:discarded
CGTCTTGAAAAAAACCCCCCCCCCTTTTTTTTTTTT
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^X
@left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded
GTCTTAAAAAAACCCCCCCCCCTTTTTTTTTTTTTT
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^X
@left:begin_with_G, discarded - left_tail:should_work,but discarded! - right_tail:shouldnt work - right:discarded
TCTTGAAAAAAAA
+
BSSMNXUTVX``[
@left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain
AAAAAAAAAACGTCTTCATTATACAGAACACAGCAT
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^X
@left_tail:shouldnt work - right_tail:shouldnt work - right:discarded
AAAAAAAAACGTCTTCATTATACAGAACACAGCAT
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^
@left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains
TGGAAGCCCCAGCGTCTTAAAAAATTTTTTAAAAAA
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^X
@left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp
TGGAAAAAAAAAAGCCCCAGCGTCTTAAAAAATTTT
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^X
@left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:Discarded_9bp
TGGAAAAAAAAAAAGCCCCAGCGTCTTAAAAATTTT
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^X
flexbar-3.5.0/test/flexbar_test.sh 0000775 0000000 0000000 00000000252 13464300463 0017144 0 ustar 00root root 0000000 0000000 #!/bin/sh -e
echo ""
echo "Testing fasta:"
./flexbar_test_fasta.sh
echo "Testing fastq:"
./flexbar_test_fastq.sh
echo "Testing decompression:"
./flexbar_test_zip.sh
flexbar-3.5.0/test/flexbar_test_fasta.sh 0000775 0000000 0000000 00000003266 13464300463 0020332 0 ustar 00root root 0000000 0000000 #!/bin/sh -e
flexbar --reads reads.fasta --target result_right --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-error-rate 0.1 --adapter-trim-end RIGHT > /dev/null
a=`diff correct_result_right.fasta result_right.fasta`
if ! $a ; then
echo "Error testing right mode fasta"
echo $a
exit 1
else
echo "Test 1 OK"
fi
flexbar --reads reads.fasta --target result_left --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-error-rate 0.1 --adapter-trim-end LEFT > /dev/null
a=`diff correct_result_left.fasta result_left.fasta`
if ! $a ; then
echo "Error testing left mode fasta"
echo $a
exit 1
else
echo "Test 2 OK"
fi
flexbar --reads reads.fasta --target result_any --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-error-rate 0.1 --adapter-trim-end ANY > /dev/null
a=`diff correct_result_any.fasta result_any.fasta`
if ! $a ; then
echo "Error testing any mode fasta"
echo $a
exit 1
else
echo "Test 3 OK"
fi
flexbar --reads reads.fasta --target result_left_tail --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-error-rate 0.1 --adapter-trim-end LTAIL > /dev/null
a=`diff correct_result_left_tail.fasta result_left_tail.fasta`
if ! $a ; then
echo "Error testing left_tail mode fasta"
echo $a
exit 1
else
echo "Test 4 OK"
fi
flexbar --reads reads.fasta --target result_right_tail --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-error-rate 0.1 --adapter-trim-end RTAIL > /dev/null
a=`diff correct_result_right_tail.fasta result_right_tail.fasta`
if ! $a ; then
echo "Error testing right_tail mode fasta"
echo $a
exit 1
else
echo "Test 5 OK"
fi
echo ""
flexbar-3.5.0/test/flexbar_test_fastq.sh 0000775 0000000 0000000 00000003266 13464300463 0020352 0 ustar 00root root 0000000 0000000 #!/bin/sh -e
flexbar --reads reads.fastq --target result_right --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-error-rate 0.1 --adapter-trim-end RIGHT > /dev/null
a=`diff correct_result_right.fastq result_right.fastq`
if ! $a ; then
echo "Error testing right mode fastq"
echo $a
exit 1
else
echo "Test 1 OK"
fi
flexbar --reads reads.fastq --target result_left --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-error-rate 0.1 --adapter-trim-end LEFT > /dev/null
a=`diff correct_result_left.fastq result_left.fastq`
if ! $a ; then
echo "Error testing left mode fastq"
echo $a
exit 1
else
echo "Test 2 OK"
fi
flexbar --reads reads.fastq --target result_any --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-error-rate 0.1 --adapter-trim-end ANY > /dev/null
a=`diff correct_result_any.fastq result_any.fastq`
if ! $a ; then
echo "Error testing any mode fastq"
echo $a
exit 1
else
echo "Test 3 OK"
fi
flexbar --reads reads.fastq --target result_left_tail --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-error-rate 0.1 --adapter-trim-end LTAIL > /dev/null
a=`diff correct_result_left_tail.fastq result_left_tail.fastq`
if ! $a ; then
echo "Error testing left_tail mode fastq"
echo $a
exit 1
else
echo "Test 4 OK"
fi
flexbar --reads reads.fastq --target result_right_tail --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-error-rate 0.1 --adapter-trim-end RTAIL > /dev/null
a=`diff correct_result_right_tail.fastq result_right_tail.fastq`
if ! $a ; then
echo "Error testing right_tail mode fastq"
echo $a
exit 1
else
echo "Test 5 OK"
fi
echo ""
flexbar-3.5.0/test/flexbar_test_zip.sh 0000775 0000000 0000000 00000001303 13464300463 0020024 0 ustar 00root root 0000000 0000000 #!/bin/sh -e
flexbar --reads reads.fastq.gz --target result_gz --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-error-rate 0.1 --adapter-trim-end RIGHT > /dev/null
a=`diff correct_result_right.fastq result_gz.fastq`
if ! $a ; then
echo "Error testing right mode gzip fastq"
echo $a
exit 1
else
echo "Test gzip OK"
fi
flexbar --reads reads.fastq.bz2 --target result_bz2 --adapter-min-overlap 4 --adapters adapters.fasta --min-read-length 10 --adapter-error-rate 0.1 --adapter-trim-end RIGHT > /dev/null
a=`diff correct_result_right.fastq result_bz2.fastq`
if ! $a ; then
echo "Error testing right mode bzip2 fastq"
echo $a
exit 1
else
echo "Test bzip2 OK"
fi
echo ""
flexbar-3.5.0/test/reads.fasta 0000664 0000000 0000000 00000003076 13464300463 0016250 0 ustar 00root root 0000000 0000000 >left_tail:shouldnt work - right_tail:shouldnt work - rigth:end with ATGC - left:discarded(C)
TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTC
>left_tail:shouldnt work - right_tail:end with ATGC - right:end_with_ATGC - left:discarded_empty_read(N)
TGAGATCGTTCAGTACGGCAATTCGTATGCCGTCTT
>left_tail:shouldnt work - right_tail:shouldnt work - right:should_end_with_TGC-left:Discarded(T)
TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTT
>left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N)
TCACCGGGTGGAAACTAGCCCCCCCCCCCACCGTCT
>left:begin_with_G-left_tail:should_work(begin with G) - right_tail:shouldnt work - right:discarded
CGTCTTGAAAAAAACCCCCCCCCCTTTTTTTTTTTT
>left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded
GTCTTAAAAAAACCCCCCCCCCTTTTTTTTTTTTTT
>left:begin_with_G, discarded - left_tail:should_work,but discarded! - right_tail:shouldnt work - right:discarded
TCTTGAAAAAAAA
>left:should_work right:discarded! - right_tail:works,discarded
AAAAAAAACGTCTT
>left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain
AAAAAAAAAACGTCTTCATTATACAGAACACAGCAT
>left_tail:shouldnt work - right_tail:shouldnt work - right:discarded
AAAAAAAAACGTCTTCATTATACAGAACACAGCAT
>left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains
TGGAAGCCCCAGCGTCTTAAAAAATTTTTTAAAAAA
>left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp
TGGAAAAAAAAAAGCCCCAGCGTCTTAAAAAATTTT
>left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:Discarded_9bp
TGGAAAAAAAAAAAGCCCCAGCGTCTTAAAAATTTT
flexbar-3.5.0/test/reads.fastq 0000664 0000000 0000000 00000004013 13464300463 0016260 0 ustar 00root root 0000000 0000000 @left_tail:shouldnt work - right_tail:shouldnt work - rigth:end with ATGC - left:discarded(C)
TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTC
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^X
@left_tail:shouldnt work - right_tail:end with ATGC - right:end_with_ATGC - left:discarded_empty_read(N)
TGAGATCGTTCAGTACGGCAATTCGTATGCCGTCTT
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^X
@left_tail:shouldnt work - right_tail:shouldnt work - right:should_end_with_TGC-left:Discarded(T)
TGAGATCGTTCAGTACGGCAATCGTATGCCGTCTTT
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^X
@left_tail:shouldnt work - right_tail:end with CAC - right:end_with_A_left:discarded_empty_read(N)
TCACCGGGTGGAAACTAGCCCCCCCCCCCACCGTCT
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^X
@left:begin_with_G-left_tail:should_work(begin with G) - right_tail:shouldnt work - right:discarded
CGTCTTGAAAAAAACCCCCCCCCCTTTTTTTTTTTT
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^X
@left:begin_with_G-left_tail:should_work(begin with A) - right_tail:shouldnt work - right:discarded
GTCTTAAAAAAACCCCCCCCCCTTTTTTTTTTTTTT
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^X
@left:begin_with_G, discarded - left_tail:should_work,but discarded! - right_tail:shouldnt work - right:discarded
TCTTGAAAAAAAA
+
BSSMNXUTVX``[
@left:should_work right:discarded! - right_tail:works,discarded
AAAAAAAACGTCTT
+
BSSMNXUTVX``[`
@left_tail:sholdnt_work - right_tail:shouldnt work - right:10bp_remain
AAAAAAAAAACGTCTTCATTATACAGAACACAGCAT
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^X
@left_tail:shouldnt work - right_tail:shouldnt work - right:discarded
AAAAAAAAACGTCTTCATTATACAGAACACAGCAT
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^
@left_tail:shouldnt work - right_tail:shouldnt work - both:right_remains
TGGAAGCCCCAGCGTCTTAAAAAATTTTTTAAAAAA
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^X
@left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:10bp
TGGAAAAAAAAAAGCCCCAGCGTCTTAAAAAATTTT
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^X
@left_tail:shouldnt work - right_tail:shouldnt work - both:left_remains-left:Discarded_9bp
TGGAAAAAAAAAAAGCCCCAGCGTCTTAAAAATTTT
+
BSSMNXUTVX``[````\`___^_^_`_`_``^_^X
flexbar-3.5.0/test/reads.fastq.bz2 0000664 0000000 0000000 00000000775 13464300463 0016767 0 ustar 00root root 0000000 0000000 BZh91AY&SY$q ߀`n`0|Mޠ@J`IM $BB& 4 E4 C
4h%!L"z F1)28-F'44nF$x}\Do,p4-VhD&V]NZ$4c9)<(҄ XĮd
zLJ +Ur
H:ji>gIh7cE&%aqAIBM֔,(BBX]]꤄bJD!Jj
t
!;д!\MGR9!J RKJKb:2BEގbwygFc@SC
ʹh*
Ě:U"Lҩ(7!CR AԩCؔP7hoPw`j$HHB$B JuZdj%SR0Cs]B@| flexbar-3.5.0/test/reads.fastq.gz 0000664 0000000 0000000 00000000660 13464300463 0016703 0 ustar 00root root 0000000 0000000 &S