pax_global_header00006660000000000000000000000064135122030070014503gustar00rootroot0000000000000052 comment=4c87d6831e9898dcaf2830182afece85e77b09ce spoa-3.0.1/000077500000000000000000000000001351220300700124465ustar00rootroot00000000000000spoa-3.0.1/.gitignore000066400000000000000000000000371351220300700144360ustar00rootroot00000000000000# Compiled Object files build/ spoa-3.0.1/.gitmodules000066400000000000000000000003111351220300700146160ustar00rootroot00000000000000[submodule "vendor/bioparser"] path = vendor/bioparser url = https://github.com/rvaser/bioparser [submodule "vendor/googletest"] path = vendor/googletest url = https://github.com/google/googletest spoa-3.0.1/.travis.yml000066400000000000000000000020231351220300700145540ustar00rootroot00000000000000language: cpp compiler: - clang - gcc before_install: # cmake 3.2 - sudo add-apt-repository ppa:george-edison55/cmake-3.x -y # g++4.8.1 - if [ "$CXX" == "g++" ]; then sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test; fi # clang 3.4 - if [ "$CXX" == "clang++" ]; then sudo add-apt-repository -y ppa:h-rayflood/llvm; fi - sudo apt-get update -qq install: # cmake 3.2 - sudo apt-get install cmake cmake-data # g++4.8.1 - if [ "$CXX" == "g++" ]; then sudo apt-get install -qq g++-4.8; fi - if [ "$CXX" == "g++" ]; then export CXX="g++-4.8"; fi # clang 3.4 - if [ "$CXX" == "clang++" ]; then sudo apt-get install --allow-unauthenticated -qq clang-3.4; fi - if [ "$CXX" == "clang++" ]; then export CXX="clang++-3.4"; fi script: - mkdir build - cd build - cmake -Dspoa_build_tests=ON -Dspoa_build_executable=ON -DCMAKE_BUILD_TYPE=Release .. - make - ./bin/spoa_test notifications: email: on_success: change on_failure: always spoa-3.0.1/CMakeLists.txt000066400000000000000000000052151351220300700152110ustar00rootroot00000000000000cmake_minimum_required(VERSION 3.2) project(spoa LANGUAGES CXX VERSION 3.0.0) include(GNUInstallDirs) set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic") set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) option(spoa_build_executable "Build spoa standalone tool" OFF) option(spoa_build_tests "Build spoa unit tests" OFF) option(spoa_optimize_for_native "Buiold spoa with march=native" ON) if (spoa_optimize_for_native) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") endif() # build SPOA as a static library by default set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build all libraries as shared") add_library(spoa src/alignment_engine.cpp src/graph.cpp src/simd_alignment_engine.cpp src/sisd_alignment_engine.cpp) target_include_directories(spoa PUBLIC $ $) set_target_properties(spoa PROPERTIES VERSION ${spoa_VERSION} SOVERSION ${spoa_VERSION}) install(TARGETS spoa DESTINATION ${CMAKE_INSTALL_LIBDIR}) install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/spoa DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) # configure and install pkg-config file configure_file(${CMAKE_CURRENT_SOURCE_DIR}/spoa.pc.in ${CMAKE_CURRENT_BINARY_DIR}/spoa-1.pc @ONLY) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/spoa-1.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) if (spoa_build_executable) add_executable(spoa_bin src/sequence.cpp src/main.cpp) if (NOT TARGET bioparser) add_subdirectory(vendor/bioparser EXCLUDE_FROM_ALL) endif() target_link_libraries(spoa_bin spoa bioparser) set_target_properties(spoa_bin PROPERTIES OUTPUT_NAME spoa) install(TARGETS spoa_bin DESTINATION ${CMAKE_INSTALL_BINDIR}) endif() if (spoa_build_tests) set(spoa_test_data_path ${PROJECT_SOURCE_DIR}/test/data/) configure_file(${PROJECT_SOURCE_DIR}/test/spoa_test_config.h.in ${PROJECT_BINARY_DIR}/config/spoa_test_config.h) include_directories(${PROJECT_BINARY_DIR}/config) include_directories(${PROJECT_SOURCE_DIR}/src) add_executable(spoa_test src/sequence.cpp test/spoa_test.cpp) if (NOT TARGET bioparser) add_subdirectory(vendor/bioparser EXCLUDE_FROM_ALL) endif() if (NOT TARGET gtest_main) add_subdirectory(vendor/googletest/googletest EXCLUDE_FROM_ALL) endif() target_link_libraries(spoa_test spoa bioparser gtest_main) endif() spoa-3.0.1/LICENSE000066400000000000000000000020671351220300700134600ustar00rootroot00000000000000The MIT License (MIT) Copyright (c) 2016 Robert Vaser Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. spoa-3.0.1/README.md000066400000000000000000000133361351220300700137330ustar00rootroot00000000000000# Spoa [![Latest GitHub release](https://img.shields.io/github/release/rvaser/spoa.svg)](https://github.com/rvaser/spoa/releases/latest) [![Build status for c++/clang++](https://travis-ci.org/rvaser/spoa.svg?branch=master)](https://travis-ci.org/rvaser/spoa) [![Published in Genome Research](https://img.shields.io/badge/published%20in-Genome%20Research-blue.svg)](https://doi.org/10.1101/gr.214270.116) Spoa (SIMD POA) is a c++ implementation of the partial order alignment (POA) algorithm (as described in 10.1093/bioinformatics/18.3.452) which is used to generate consensus sequences (as described in 10.1093/bioinformatics/btg109). It supports three alignment modes: local (Smith-Waterman), global (Needleman-Wunsch) and semi-global alignment (overlap), and three gap modes: linear, affine and convex (piecewise affine). It supports Intel SSE4.1+ and AVX2 vectorization (marginally faster due to high latency shifts). ## Dependencies ### Linux Application uses following software: 1. gcc 4.8+ or clang 3.4+ 2. cmake 3.2+ ## Installation CmakeLists is provided in the project root folder. By running the following commands: ```bash git clone --recursive https://github.com/rvaser/spoa spoa cd spoa mkdir build cd build cmake -DCMAKE_BUILD_TYPE=Release .. make ``` a library named `libspoa.a` will appear in the `build/lib` directory. If you want the spoa executable, run the following two commands: ```bash cmake -DCMAKE_BUILD_TYPE=Release -Dspoa_build_executable=ON .. make ``` which will place an executable named `spoa` in `build/bin` directory. Optionally, you can run `sudo make install` to install spoa library (and executable) to your machine. ***Note***: if you omitted `--recursive` from `git clone`, run `git submodule init` and `git submodule update` before proceeding with compilation. To build unit tests add `-Dspoa_build_tests=ON` while running `cmake`. After installation, an executable named `spoa_test` will be created in `build/bin`. ## Usage Usage of spoa is as following: ```bash spoa [options ...] input file in FASTA/FASTQ format (can be compressed with gzip) containing sequences options: -m default: 5 score for matching bases -n default: -4 score for mismatching bases -g default: -8 gap opening penalty (must be non-positive) -e default: -6 gap extension penalty (must be non-positive) -q default: -10 gap opening penalty of the second affine function (must be non-positive) -c default: -4 gap extension penalty of the second affine function (must be non-positive) -l, --algorithm default: 0 alignment mode: 0 - local (Smith-Waterman) 1 - global (Needleman-Wunsch) 2 - semi-global -r, --result default: 0 result mode: 0 - consensus 1 - multiple sequence alignment 2 - 0 & 1 -d, --dot output file for the final POA graph in DOT format --version prints the version number -h, --help prints the usage gap mode: linear if g >= e affine if g <= q or e >= c convex otherwise (default) ``` ### Library Simple library usage can be seen in the following `example.cpp` file. This code shows how to get consensus and multiple sequence alignment for a set of sequences without quality values. ```cpp #include "spoa/spoa.hpp" int main(int argc, char** argv) { std::vector sequences = { "CATAAAAGAACGTAGGTCGCCCGTCCGTAACCTGTCGGATCACCGGAAAGGACCCGTAAAGTGATAATGAT", "ATAAAGGCAGTCGCTCTGTAAGCTGTCGATTCACCGGAAAGATGGCGTTACCACGTAAAGTGATAATGATTAT", "ATCAAAGAACGTGTAGCCTGTCCGTAATCTAGCGCATTTCACACGAGACCCGCGTAATGGG", "CGTAAATAGGTAATGATTATCATTACATATCACAACTAGGGCCGTATTAATCATGATATCATCA", "GTCGCTAGAGGCATCGTGAGTCGCTTCCGTACCGCAAGGATGACGAGTCACTTAAAGTGATAAT", "CCGTAACCTTCATCGGATCACCGGAAAGGACCCGTAAATAGACCTGATTATCATCTACAT" }; auto alignment_engine = spoa::createAlignmentEngine(static_cast(atoi(argv[1])), atoi(argv[2]), atoi(argv[3]), atoi(argv[4]), atoi(argv[5])); auto graph = spoa::createGraph(); for (const auto& it: sequences) { auto alignment = alignment_engine->align(it, graph); graph->add_alignment(alignment, it); } std::string consensus = graph->generate_consensus(); fprintf(stderr, "Consensus (%zu)\n", consensus.size()); fprintf(stderr, "%s\n", consensus.c_str()); std::vector msa; graph->generate_multiple_sequence_alignment(msa); fprintf(stderr, "Multiple sequence alignment\n"); for (const auto& it: msa) { fprintf(stderr, "%s\n", it.c_str()); } return 0; } ``` This code can be compiled from spoa root directory with: ```bash g++ example.cpp -std=c++11 -Iinclude/ -Lbuild/lib/ -lspoa -o example ``` or with the following command if spoa was installed beforehand: ```bash g++ example.cpp -std=c++11 -lspoa -o example ``` The executable can be run with: ```bash ./example 0 5 -4 -8 -6 ``` On the other hand, if you are using `cmake` you can add spoa to your project by adding commands `add_subdirectory(vendor/spoa EXCLUDE_FROM_ALL)` and `target_link_libraries(your_exe spoa)` to your main CMakeLists file. ## Contact information For additional information, help and bug reports please send an email to: robert.vaser@fer.hr. ## Acknowledgement This work has been supported in part by Croatian Science Foundation under the project UIP-11-2013-7353. spoa-3.0.1/include/000077500000000000000000000000001351220300700140715ustar00rootroot00000000000000spoa-3.0.1/include/spoa/000077500000000000000000000000001351220300700150335ustar00rootroot00000000000000spoa-3.0.1/include/spoa/alignment_engine.hpp000066400000000000000000000034521351220300700210530ustar00rootroot00000000000000/*! * @file alignment_engine.hpp * * @brief AlignmentEngine class header file */ #pragma once #include #include #include #include #include namespace spoa { enum class AlignmentType { kSW, // Smith Waterman kNW, // Needleman Wunsch kOV // Overlap }; enum class AlignmentSubtype { kLinear, kAffine, kConvex }; class Graph; using Alignment = std::vector>; class AlignmentEngine; std::unique_ptr createAlignmentEngine(AlignmentType type, std::int8_t m, std::int8_t n, std::int8_t g); std::unique_ptr createAlignmentEngine(AlignmentType type, std::int8_t m, std::int8_t n, std::int8_t g, std::int8_t e); std::unique_ptr createAlignmentEngine(AlignmentType type, std::int8_t m, std::int8_t n, std::int8_t g, std::int8_t e, std::int8_t q, std::int8_t c); class AlignmentEngine { public: virtual ~AlignmentEngine() {} virtual void prealloc(std::uint32_t max_sequence_size, std::uint32_t alphabet_size) = 0; Alignment align(const std::string& sequence, const std::unique_ptr& graph); virtual Alignment align(const char* sequence, std::uint32_t sequence_size, const std::unique_ptr& graph) noexcept = 0; protected: AlignmentEngine(AlignmentType type, AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g, std::int8_t e, std::int8_t q, std::int8_t c); AlignmentEngine(const AlignmentEngine&) = delete; const AlignmentEngine& operator=(const AlignmentEngine&) = delete; AlignmentType type_; AlignmentSubtype subtype_; std::int8_t m_; std::int8_t n_; std::int8_t g_; std::int8_t e_; std::int8_t q_; std::int8_t c_; }; } spoa-3.0.1/include/spoa/graph.hpp000066400000000000000000000125771351220300700166610ustar00rootroot00000000000000/*! * @file graph.hpp * * @brief Graph class header file */ #pragma once #include #include #include #include #include #include namespace spoa { class Node; class Edge; class Graph; std::unique_ptr createGraph(); using Alignment = std::vector>; class Graph { public: ~Graph(); const std::vector>& nodes() const { return nodes_; } const std::vector& rank_to_node_id() const { return rank_to_node_id_; } std::uint32_t num_codes() const { return num_codes_; }; std::uint8_t coder(std::uint8_t c) const { return coder_[c]; } std::uint8_t decoder(std::uint8_t code) const { return decoder_[code]; } void add_alignment(const Alignment& alignment, const std::string& sequence, std::uint32_t weight = 1); void add_alignment(const Alignment& alignment, const char* sequence, std::uint32_t sequence_size, std::uint32_t weight = 1); void add_alignment(const Alignment& alignment, const std::string& sequence, const std::string& quality); void add_alignment(const Alignment& alignment, const char* sequence, std::uint32_t sequence_size, const char* quality, std::uint32_t quality_size); void add_alignment(const Alignment& alignment, const std::string& sequence, const std::vector& weights); void add_alignment(const Alignment& alignment, const char* sequence, std::uint32_t sequence_size, const std::vector& weights); void generate_multiple_sequence_alignment(std::vector& dst, bool include_consensus = false); std::string generate_consensus(); // returns base coverages or complete summary matrix if verbose equals true std::string generate_consensus(std::vector& dst, bool verbose = false); std::unique_ptr subgraph(std::uint32_t begin_node_id, std::uint32_t end_node_id, std::vector& subgraph_to_graph_mapping) const; void update_alignment(Alignment& alignment, const std::vector& subgraph_to_graph_mapping) const; void print_dot(const std::string& path) const; void clear(); friend std::unique_ptr createGraph(); private: Graph(); Graph(const Graph&) = delete; const Graph& operator=(const Graph&) = delete; static std::unique_ptr createNode(std::uint32_t id, std::uint32_t code); static std::unique_ptr createEdge(std::uint32_t begin_node_id, std::uint32_t end_node_id, std::uint32_t label, std::uint32_t weight); std::uint32_t add_node(std::uint32_t code); void add_edge(std::uint32_t begin_node_id, std::uint32_t end_node_id, std::uint32_t weight); std::int32_t add_sequence(const char* sequence, const std::vector& weights, std::uint32_t begin, std::uint32_t end); void topological_sort(); bool is_topologically_sorted() const; void traverse_heaviest_bundle(); std::uint32_t branch_completion(std::vector& scores, std::vector& predecessors, std::uint32_t rank); void extract_subgraph_nodes(std::vector& dst, std::uint32_t current_node_id, std::uint32_t end_node_id) const; std::uint32_t initialize_multiple_sequence_alignment( std::vector& dst) const; std::uint32_t num_sequences_; std::uint32_t num_codes_; std::vector coder_; std::vector decoder_; std::vector> nodes_; std::vector rank_to_node_id_; std::vector sequences_begin_nodes_ids_; std::vector consensus_; }; class Node { public: ~Node(); std::uint32_t id() const { return id_; } std::uint32_t code() const { return code_; } const std::vector>& in_edges() const { return in_edges_; } const std::vector>& out_edges() const { return out_edges_; } const std::vector& aligned_nodes_ids() const { return aligned_nodes_ids_; } bool successor(std::uint32_t& dst, std::uint32_t label) const; std::uint32_t coverage() const; friend Graph; private: Node(std::uint32_t id, std::uint32_t code); Node(const Node&) = delete; const Node& operator=(const Node&) = delete; std::uint32_t id_; std::uint32_t code_; std::vector> in_edges_; std::vector> out_edges_; std::vector aligned_nodes_ids_; }; class Edge { public: ~Edge(); std::uint32_t begin_node_id() const { return begin_node_id_; } std::uint32_t end_node_id() const { return end_node_id_; } friend Graph; friend Node; private: Edge(std::uint32_t begin_node_id, std::uint32_t end_node_id, std::uint32_t label, std::uint32_t weight); Edge(const Edge&) = delete; const Edge& operator=(const Edge&) = delete; void add_sequence(std::uint32_t label, std::uint32_t weight = 1); std::uint32_t begin_node_id_; std::uint32_t end_node_id_; std::vector sequence_labels_; std::int64_t total_weight_; }; } spoa-3.0.1/include/spoa/spoa.hpp000066400000000000000000000002421351220300700165040ustar00rootroot00000000000000/*! * @file spoa.hpp * * @brief spoa header file which encapsulates the implementation */ #pragma once #include "graph.hpp" #include "alignment_engine.hpp" spoa-3.0.1/spoa.pc.in000066400000000000000000000004101351220300700143340ustar00rootroot00000000000000libdir=@CMAKE_INSTALL_FULL_LIBDIR@ includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ Name: Spoa Description: Spoa (SIMD POA) is a C++ implementation of the partial order alignment (POA) algorithm Version: @spoa_VERSION@ Libs: -L${libdir} -lspoa Cflags: -I${includedir} spoa-3.0.1/src/000077500000000000000000000000001351220300700132355ustar00rootroot00000000000000spoa-3.0.1/src/alignment_engine.cpp000066400000000000000000000045541351220300700172540ustar00rootroot00000000000000/*! * @file alignment_engine.cpp * * @brief AlignmentEngine class source file */ #include #include #include #include "sisd_alignment_engine.hpp" #include "simd_alignment_engine.hpp" #include "spoa/alignment_engine.hpp" namespace spoa { std::unique_ptr createAlignmentEngine(AlignmentType type, std::int8_t m, std::int8_t n, std::int8_t g) { return createAlignmentEngine(type, m, n, g, g); } std::unique_ptr createAlignmentEngine(AlignmentType type, std::int8_t m, std::int8_t n, std::int8_t g, std::int8_t e) { return createAlignmentEngine(type, m, n, g, e, g, e); } std::unique_ptr createAlignmentEngine(AlignmentType type, std::int8_t m, std::int8_t n, std::int8_t g, std::int8_t e, std::int8_t q, std::int8_t c) { if (type != AlignmentType::kSW && type != AlignmentType::kNW && type != AlignmentType::kOV) { throw std::invalid_argument("[spoa::createAlignmentEngine] error: " "invalid alignment type!"); } if (g > 0 || q > 0) { throw std::invalid_argument("[spoa::createAlignmentEngine] error: " "gap opening penalty must be non-positive!"); } if (e > 0 || c > 0) { throw std::invalid_argument("[spoa::createAlignmentEngine] error: " "gap extension penalty must be non-positive!"); } AlignmentSubtype subtype = g >= e ? AlignmentSubtype::kLinear : (g <= q || e >= c ? AlignmentSubtype::kAffine : AlignmentSubtype::kConvex); if (subtype == AlignmentSubtype::kLinear) { e = g; } else if (subtype == AlignmentSubtype::kAffine) { q = g; c = e; } auto alignment_engine = createSimdAlignmentEngine(type, subtype, m, n, g, e, q, c); if (alignment_engine == nullptr) { return createSisdAlignmentEngine(type, subtype, m, n, g, e, q, c); } return alignment_engine; } AlignmentEngine::AlignmentEngine(AlignmentType type, AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g, std::int8_t e, std::int8_t q, std::int8_t c) : type_(type), subtype_(subtype), m_(m), n_(n), g_(g), e_(e), q_(q), c_(c) { } Alignment AlignmentEngine::align(const std::string& sequence, const std::unique_ptr& graph) { return align(sequence.c_str(), sequence.size(), graph); } } spoa-3.0.1/src/graph.cpp000066400000000000000000000547231351220300700150550ustar00rootroot00000000000000/*! * @file graph.cpp * * @brief Graph class source file */ #include #include #include #include #include "spoa/graph.hpp" namespace spoa { constexpr std::uint32_t kMaxAlphabetSize = 256; std::unique_ptr Graph::createNode(std::uint32_t id, std::uint32_t code) { return std::unique_ptr(new Node(id, code)); } Node::Node(std::uint32_t id, std::uint32_t code) : id_(id), code_(code), in_edges_(), out_edges_(), aligned_nodes_ids_() { } Node::~Node() { } bool Node::successor(std::uint32_t& dst, std::uint32_t label) const { for (const auto& edge: out_edges_) { for (const auto& l: edge->sequence_labels_) { if (l == label) { dst = edge->end_node_id_; return true; } } } return false; } std::uint32_t Node::coverage() const { std::unordered_set label_set; for (const auto& edge: in_edges_) { for (const auto& label: edge->sequence_labels_) { label_set.insert(label); } } for (const auto& edge: out_edges_) { for (const auto& label: edge->sequence_labels_) { label_set.insert(label); } } return label_set.size(); } std::unique_ptr Graph::createEdge(std::uint32_t begin_node_id, std::uint32_t end_node_id, std::uint32_t label, std::uint32_t weight) { return std::unique_ptr(new Edge(begin_node_id, end_node_id, label, weight)); } Edge::Edge(std::uint32_t begin_node_id, std::uint32_t end_node_id, std::uint32_t label, std::uint32_t weight) : begin_node_id_(begin_node_id), end_node_id_(end_node_id), sequence_labels_(1, label), total_weight_(weight) { } Edge::~Edge() { } void Edge::add_sequence(std::uint32_t label, std::uint32_t weight) { sequence_labels_.emplace_back(label); total_weight_ += weight; } std::unique_ptr createGraph() { return std::unique_ptr(new Graph()); } Graph::Graph() : num_sequences_(0), num_codes_(0), coder_(kMaxAlphabetSize, -1), decoder_(kMaxAlphabetSize, -1), nodes_(), rank_to_node_id_(), sequences_begin_nodes_ids_(), consensus_() { } Graph::~Graph() { } std::uint32_t Graph::add_node(std::uint32_t code) { std::uint32_t node_id = nodes_.size(); nodes_.emplace_back(createNode(node_id, code)); return node_id; } void Graph::add_edge(std::uint32_t begin_node_id, std::uint32_t end_node_id, std::uint32_t weight) { assert(begin_node_id < nodes_.size() && end_node_id < nodes_.size()); for (const auto& edge: nodes_[begin_node_id]->out_edges_) { if (edge->end_node_id_ == end_node_id) { edge->add_sequence(num_sequences_, weight); return; } } std::shared_ptr edge = createEdge(begin_node_id, end_node_id, num_sequences_, weight); nodes_[begin_node_id]->out_edges_.emplace_back(edge); nodes_[end_node_id]->in_edges_.emplace_back(edge); } void Graph::add_alignment(const Alignment& alignment, const std::string& sequence, std::uint32_t weight) { add_alignment(alignment, sequence.c_str(), sequence.size(), weight); } void Graph::add_alignment(const Alignment& alignment, const char* sequence, std::uint32_t sequence_size, std::uint32_t weight) { std::vector weights(sequence_size, weight); add_alignment(alignment, sequence, sequence_size, weights); } void Graph::add_alignment(const Alignment& alignment, const std::string& sequence, const std::string& quality) { add_alignment(alignment, sequence.c_str(), sequence.size(), quality.c_str(), quality.size()); } void Graph::add_alignment(const Alignment& alignment, const char* sequence, std::uint32_t sequence_size, const char* quality, std::uint32_t quality_size) { std::vector weights; for (std::uint32_t i = 0; i < quality_size; ++i) { weights.emplace_back(static_cast(quality[i] - 33)); // PHRED quality } add_alignment(alignment, sequence, sequence_size, weights); } void Graph::add_alignment(const Alignment& alignment, const std::string& sequence, const std::vector& weights) { add_alignment(alignment, sequence.c_str(), sequence.size(), weights); } void Graph::add_alignment(const Alignment& alignment, const char* sequence, std::uint32_t sequence_size, const std::vector& weights) { if (sequence_size == 0) { return; } if (sequence_size != weights.size()) { throw std::invalid_argument("[spoa::Graph::add_alignment] error: " "sequence and weights are of unequal size!"); } for (std::uint32_t i = 0; i < sequence_size; ++i) { auto c = sequence[i]; if (coder_[c] == -1) { coder_[c] = num_codes_; decoder_[num_codes_] = c; ++num_codes_; } } if (alignment.empty()) { // no alignment std::int32_t begin_node_id = add_sequence(sequence, weights, 0, sequence_size); ++num_sequences_; sequences_begin_nodes_ids_.emplace_back(begin_node_id); topological_sort(); return; } std::vector valid_seq_ids; for (const auto& it: alignment) { if (it.second != -1) { valid_seq_ids.emplace_back(it.second); } } assert(valid_seq_ids.front() <= sequence_size); assert(valid_seq_ids.back() + 1 <= sequence_size); std::uint32_t tmp = nodes_.size(); std::int32_t begin_node_id = add_sequence(sequence, weights, 0, valid_seq_ids.front()); std::int32_t head_node_id = tmp == nodes_.size() ? -1 : nodes_.size() - 1; std::int32_t tail_node_id = add_sequence(sequence, weights, valid_seq_ids.back() + 1, sequence_size); std::int32_t new_node_id = -1; float prev_weight = head_node_id == -1 ? 0 : weights[valid_seq_ids.front() - 1]; for (std::uint32_t i = 0; i < alignment.size(); ++i) { if (alignment[i].second == -1) { continue; } char letter = sequence[alignment[i].second]; if (alignment[i].first == -1) { new_node_id = add_node(coder_[letter]); } else { if (decoder_[nodes_[alignment[i].first]->code_] == letter) { new_node_id = alignment[i].first; } else { std::int32_t aligned_to_node_id = -1; for (const auto& aid: nodes_[alignment[i].first]->aligned_nodes_ids_) { if (decoder_[nodes_[aid]->code_] == letter) { aligned_to_node_id = aid; break; } } if (aligned_to_node_id == -1) { new_node_id = add_node(coder_[letter]); for (const auto& aid: nodes_[alignment[i].first]->aligned_nodes_ids_) { nodes_[new_node_id]->aligned_nodes_ids_.emplace_back(aid); nodes_[aid]->aligned_nodes_ids_.emplace_back(new_node_id); } nodes_[new_node_id]->aligned_nodes_ids_.emplace_back( alignment[i].first); nodes_[alignment[i].first]->aligned_nodes_ids_.emplace_back( new_node_id); } else { new_node_id = aligned_to_node_id; } } } if (begin_node_id == -1) { begin_node_id = new_node_id; } if (head_node_id != -1) { // both nodes contribute to edge weight add_edge(head_node_id, new_node_id, prev_weight + weights[alignment[i].second]); } head_node_id = new_node_id; prev_weight = weights[alignment[i].second]; } if (tail_node_id != -1) { // both nodes contribute to edge weight add_edge(head_node_id, tail_node_id, prev_weight + weights[valid_seq_ids.back() + 1]); } ++num_sequences_; sequences_begin_nodes_ids_.emplace_back(begin_node_id); topological_sort(); } std::int32_t Graph::add_sequence(const char* sequence, const std::vector& weights, std::uint32_t begin, std::uint32_t end) { if (begin == end) { return -1; } std::int32_t first_node_id = add_node(coder_[sequence[begin]]); std::uint32_t node_id; for (std::uint32_t i = begin + 1; i < end; ++i) { node_id = add_node(coder_[sequence[i]]); // both nodes contribute to edge weight add_edge(node_id - 1, node_id, weights[i - 1] + weights[i]); } return first_node_id; } void Graph::topological_sort() { rank_to_node_id_.clear(); // 0 - unmarked, 1 - temporarily marked, 2 - permanently marked std::vector node_marks(nodes_.size(), 0); std::vector check_aligned_nodes(nodes_.size(), true); std::stack nodes_to_visit; for (std::uint32_t i = 0; i < nodes_.size(); ++i) { if (node_marks[i] != 0) { continue; } nodes_to_visit.push(i); while (nodes_to_visit.size() != 0) { std::uint32_t node_id = nodes_to_visit.top(); bool valid = true; if (node_marks[node_id] != 2) { for (const auto& edge: nodes_[node_id]->in_edges_) { if (node_marks[edge->begin_node_id_] != 2) { nodes_to_visit.push(edge->begin_node_id_); valid = false; } } if (check_aligned_nodes[node_id]) { for (const auto& aid: nodes_[node_id]->aligned_nodes_ids_) { if (node_marks[aid] != 2) { nodes_to_visit.push(aid); check_aligned_nodes[aid] = false; valid = false; } } } assert((valid || node_marks[node_id] != 1) && "Graph is not a DAG!"); if (valid) { node_marks[node_id] = 2; if (check_aligned_nodes[node_id]) { rank_to_node_id_.push_back(node_id); for (const auto& aid: nodes_[node_id]->aligned_nodes_ids_) { rank_to_node_id_.emplace_back(aid); } } } else { node_marks[node_id] = 1; } } if (valid) { nodes_to_visit.pop(); } } } assert(is_topologically_sorted() == true); } bool Graph::is_topologically_sorted() const { assert(nodes_.size() == rank_to_node_id_.size()); std::vector visited_nodes(nodes_.size(), false); for (std::uint32_t node_id: rank_to_node_id_) { for (const auto& edge: nodes_[node_id]->in_edges_) { if (visited_nodes[edge->begin_node_id_] == false) { return false; } } visited_nodes[node_id] = true; } return true; } std::uint32_t Graph::initialize_multiple_sequence_alignment( std::vector& dst) const { dst.resize(nodes_.size(), 0); std::uint32_t msa_id = 0; for (std::uint32_t i = 0; i < nodes_.size(); ++i) { std::uint32_t node_id = rank_to_node_id_[i]; dst[node_id] = msa_id; for (std::uint32_t j = 0; j < nodes_[node_id]->aligned_nodes_ids_.size(); ++j) { dst[rank_to_node_id_[++i]] = msa_id; } ++msa_id; } return msa_id; } void Graph::generate_multiple_sequence_alignment(std::vector& dst, bool include_consensus) { // assign msa id to each node std::vector node_id_to_msa_id; auto msa_length = initialize_multiple_sequence_alignment(node_id_to_msa_id); // extract sequences from graph and create msa strings (add indels(-) where // necessary) for (std::uint32_t i = 0; i < num_sequences_; ++i) { std::string alignment_str(msa_length, '-'); std::uint32_t node_id = sequences_begin_nodes_ids_[i]; while (true) { alignment_str[node_id_to_msa_id[node_id]] = decoder_[nodes_[node_id]->code_]; if (!nodes_[node_id]->successor(node_id, i)) { break; } } dst.emplace_back(alignment_str); } if (include_consensus) { // do the same for consensus sequence traverse_heaviest_bundle(); std::string alignment_str(msa_length, '-'); for (const auto& node_id: consensus_) { alignment_str[node_id_to_msa_id[node_id]] = decoder_[nodes_[node_id]->code_]; } dst.emplace_back(alignment_str); } } std::string Graph::generate_consensus() { traverse_heaviest_bundle(); std::string consensus_str = ""; for (const auto& node_id: consensus_) { consensus_str += decoder_[nodes_[node_id]->code_]; } return consensus_str; } std::string Graph::generate_consensus(std::vector& dst, bool verbose) { auto consensus_str = generate_consensus(); dst.clear(); if (verbose == false) { for (const auto& node_id: consensus_) { std::uint32_t total_coverage = nodes_[node_id]->coverage(); for (const auto& aid: nodes_[node_id]->aligned_nodes_ids_) { total_coverage += nodes_[aid]->coverage(); } dst.emplace_back(total_coverage); } } else { dst.resize((num_codes_ + 1) * consensus_.size(), 0); std::vector node_id_to_msa_id; initialize_multiple_sequence_alignment(node_id_to_msa_id); for (std::uint32_t i = 0; i < num_sequences_; ++i) { auto node_id = sequences_begin_nodes_ids_[i]; bool count_indels = false; std::uint32_t c = 0, l; while (true) { for (; c < consensus_.size() && node_id_to_msa_id[consensus_[c]] < node_id_to_msa_id[node_id]; ++c); if (c >= consensus_.size()) { break; } if (node_id_to_msa_id[consensus_[c]] == node_id_to_msa_id[node_id]) { if (count_indels) { for (std::uint32_t j = l + 1; j < c; ++j) { ++dst[num_codes_ * consensus_.size() + j]; } } count_indels = true; l = c; ++dst[nodes_[node_id]->code_ * consensus_.size() + c]; } if (!nodes_[node_id]->successor(node_id, i)) { break; } } } } return consensus_str; } void Graph::traverse_heaviest_bundle() { std::vector predecessors(nodes_.size(), -1); std::vector scores(nodes_.size(), -1); std::uint32_t max_score_id = 0; for (const auto& node_id: rank_to_node_id_) { for (const auto& edge: nodes_[node_id]->in_edges_) { if (scores[node_id] < edge->total_weight_ || (scores[node_id] == edge->total_weight_ && scores[predecessors[node_id]] <= scores[edge->begin_node_id_])) { scores[node_id] = edge->total_weight_; predecessors[node_id] = edge->begin_node_id_; } } if (predecessors[node_id] != -1) { scores[node_id] += scores[predecessors[node_id]]; } if (scores[max_score_id] < scores[node_id]) { max_score_id = node_id; } } if (nodes_[max_score_id]->out_edges_.size() != 0) { std::vector node_id_to_rank(nodes_.size(), 0); for (std::uint32_t i = 0; i < nodes_.size(); ++i) { node_id_to_rank[rank_to_node_id_[i]] = i; } while (nodes_[max_score_id]->out_edges_.size() != 0) { max_score_id = branch_completion(scores, predecessors, node_id_to_rank[max_score_id]); } } // traceback consensus_.clear(); while (predecessors[max_score_id] != -1) { consensus_.emplace_back(max_score_id); max_score_id = predecessors[max_score_id]; } consensus_.emplace_back(max_score_id); std::reverse(consensus_.begin(), consensus_.end()); } std::uint32_t Graph::branch_completion(std::vector& scores, std::vector& predecessors, std::uint32_t rank) { std::uint32_t node_id = rank_to_node_id_[rank]; for (const auto& edge: nodes_[node_id]->out_edges_) { for (const auto& o_edge: nodes_[edge->end_node_id_]->in_edges_) { if (o_edge->begin_node_id_ != node_id) { scores[o_edge->begin_node_id_] = -1; } } } std::int64_t max_score = 0; std::uint32_t max_score_id = 0; for (std::uint32_t i = rank + 1; i < rank_to_node_id_.size(); ++i) { std::uint32_t node_id = rank_to_node_id_[i]; scores[node_id] = -1; predecessors[node_id] = -1; for (const auto& edge: nodes_[node_id]->in_edges_) { if (scores[edge->begin_node_id_] == -1) { continue; } if (scores[node_id] < edge->total_weight_ || (scores[node_id] == edge->total_weight_ && scores[predecessors[node_id]] <= scores[edge->begin_node_id_])) { scores[node_id] = edge->total_weight_; predecessors[node_id] = edge->begin_node_id_; } } if (predecessors[node_id] != -1) { scores[node_id] += scores[predecessors[node_id]]; } if (max_score < scores[node_id]) { max_score = scores[node_id]; max_score_id = node_id; } } return max_score_id; } // backtracing from right to left! void Graph::extract_subgraph_nodes(std::vector& dst, std::uint32_t begin_node_id, std::uint32_t end_node_id) const { dst.resize(nodes_.size(), false); std::stack nodes_to_visit; nodes_to_visit.push(begin_node_id); while (nodes_to_visit.size() != 0) { std::uint32_t node_id = nodes_to_visit.top(); nodes_to_visit.pop(); if (dst[node_id] == false && node_id >= end_node_id) { for (const auto& edge: nodes_[node_id]->in_edges_) { nodes_to_visit.push(edge->begin_node_id_); } for (const auto& aid: nodes_[node_id]->aligned_nodes_ids_) { nodes_to_visit.push(aid); } dst[node_id] = true; } } } std::unique_ptr Graph::subgraph(std::uint32_t begin_node_id, std::uint32_t end_node_id, std::vector& subgraph_to_graph_mapping) const { std::vector is_subgraph_node; extract_subgraph_nodes(is_subgraph_node, end_node_id, begin_node_id); // init subgraph auto subgraph = std::unique_ptr(new Graph()); subgraph->num_sequences_ = num_sequences_; subgraph->num_codes_ = num_codes_; subgraph->coder_ = std::vector(coder_); subgraph->decoder_ = std::vector(decoder_); // create mapping from subgraph to graph and vice versa and add nodes to // subgraph subgraph_to_graph_mapping.resize(nodes_.size(), -1); std::vector graph_to_subgraph_mapping(nodes_.size(), -1); for (std::uint32_t i = 0; i < is_subgraph_node.size(); ++i) { if (is_subgraph_node[i] == false) { continue; } std::uint32_t subgraph_id = subgraph->add_node(nodes_[i]->code_); graph_to_subgraph_mapping[i] = subgraph_id; subgraph_to_graph_mapping[subgraph_id] = i; } // add edges and aligned nodes for (std::uint32_t i = 0; i < is_subgraph_node.size(); ++i) { if (is_subgraph_node[i] == false) { continue; } std::uint32_t subgraph_id = graph_to_subgraph_mapping[i]; for (const auto& edge: nodes_[i]->in_edges_) { if (graph_to_subgraph_mapping[edge->begin_node_id_] == -1) { continue; } subgraph->add_edge(graph_to_subgraph_mapping[edge->begin_node_id_], subgraph_id, edge->total_weight_); } for (const auto& aid: nodes_[i]->aligned_nodes_ids_) { if (graph_to_subgraph_mapping[aid] == -1) { continue; } subgraph->nodes_[subgraph_id]->aligned_nodes_ids_.emplace_back( graph_to_subgraph_mapping[aid]); } } subgraph->topological_sort(); return subgraph; } void Graph::update_alignment(Alignment& alignment, const std::vector& subgraph_to_graph_mapping) const { for (std::uint32_t i = 0; i < alignment.size(); ++i) { if (alignment[i].first != -1) { alignment[i].first = subgraph_to_graph_mapping[alignment[i].first]; } } } void Graph::print_dot(const std::string& path) const { if (path.empty()) { return; } std::ofstream out(path); std::vector in_consensus(nodes_.size(), -1); std::int32_t rank = 0; for (const auto& id: consensus_) { in_consensus[id] = rank++; } out << "digraph " << num_sequences_ << " {" << std::endl; out << " graph [rankdir=LR]" << std::endl; for (std::uint32_t i = 0; i < nodes_.size(); ++i) { out << " " << i << " [label = \"" << i << " - "; out << static_cast(decoder_[nodes_[i]->code_]) << "\""; if (in_consensus[i] != -1) { out << ", style=filled, fillcolor=goldenrod1"; } out << "]" << std::endl; for (const auto& edge: nodes_[i]->out_edges_) { out << " " << i << " -> " << edge->end_node_id_; out << " [label = \"" << edge->total_weight_ << "\""; if (in_consensus[i] + 1 == in_consensus[edge->end_node_id_]) { out << ", color=goldenrod1"; } out << "]" << std::endl; } for (const auto& aid: nodes_[i]->aligned_nodes_ids_) { if (aid > i) { out << " " << i << " -> " << aid; out << " [style = dotted, arrowhead = none]" << std::endl; } } } out << "}" << std::endl; out.close(); } void Graph::clear() { std::fill(coder_.begin(), coder_.end(), -1); std::fill(decoder_.begin(), decoder_.end(), -1); nodes_.clear(); rank_to_node_id_.clear(); sequences_begin_nodes_ids_.clear(); consensus_.clear(); } } spoa-3.0.1/src/main.cpp000066400000000000000000000147301351220300700146720ustar00rootroot00000000000000#include #include #include #include #include #include "sequence.hpp" #include "spoa/spoa.hpp" #include "bioparser/bioparser.hpp" static const std::string version = "v3.0.1"; static struct option options[] = { {"algorithm", required_argument, nullptr, 'l'}, {"result", required_argument, nullptr, 'r'}, {"dot", required_argument, nullptr, 'd'}, {"version", no_argument, nullptr, 'v'}, {"help", no_argument, nullptr, 'h'}, {nullptr, 0, nullptr, 0} }; void help(); int main(int argc, char** argv) { std::int8_t m = 5; std::int8_t n = -4; std::int8_t g = -8; std::int8_t e = -6; std::int8_t q = -10; std::int8_t c = -4; std::uint8_t algorithm = 0; std::uint8_t result = 0; std::string dot_path = ""; char opt; while ((opt = getopt_long(argc, argv, "m:n:g:e:q:c:l:r:d:h", options, nullptr)) != -1) { switch (opt) { case 'm': m = atoi(optarg); break; case 'n': n = atoi(optarg); break; case 'g': g = atoi(optarg); break; case 'e': e = atoi(optarg); break; case 'q': q = atoi(optarg); break; case 'c': c = atoi(optarg); break; case 'l': algorithm = atoi(optarg); break; case 'r': result = atoi(optarg); break; case 'd': dot_path = optarg; break; case 'v': std::cout << version << std::endl; return 0; case 'h': help(); return 0; default: return 1; } } if (optind >= argc) { std::cerr << "[spoa::] error: missing input file!" << std::endl; help(); return 1; } std::string sequences_path = argv[optind]; auto is_suffix = [](const std::string& src, const std::string& suffix) -> bool { if (src.size() < suffix.size()) { return false; } return src.compare(src.size() - suffix.size(), suffix.size(), suffix) == 0; }; std::unique_ptr> sparser = nullptr; if (is_suffix(sequences_path, ".fasta") || is_suffix(sequences_path, ".fa") || is_suffix(sequences_path, ".fasta.gz") || is_suffix(sequences_path, ".fa.gz")) { sparser = bioparser::createParser( sequences_path); } else if (is_suffix(sequences_path, ".fastq") || is_suffix(sequences_path, ".fq") || is_suffix(sequences_path, ".fastq.gz") || is_suffix(sequences_path, ".fq.gz")) { sparser = bioparser::createParser( sequences_path); } else { std::cerr << "[spoa::] error: file " << sequences_path << " has unsupported format extension (valid extensions: .fasta, " ".fasta.gz, .fa, .fa.gz, .fastq, .fastq.gz, .fq, .fq.gz)!" << std::endl; return 1; } std::unique_ptr alignment_engine; try { alignment_engine = spoa::createAlignmentEngine( static_cast(algorithm), m, n, g, e, q, c); } catch(std::invalid_argument& exception) { std::cerr << exception.what() << std::endl; return 1; } auto graph = spoa::createGraph(); std::vector> sequences; sparser->parse(sequences, -1); std::size_t max_sequence_size = 0; for (const auto& it: sequences) { max_sequence_size = std::max(max_sequence_size, it->data().size()); } alignment_engine->prealloc(max_sequence_size, 4); for (const auto& it: sequences) { auto alignment = alignment_engine->align(it->data(), graph); try { graph->add_alignment(alignment, it->data(), it->quality()); } catch(std::invalid_argument& exception) { std::cerr << exception.what() << std::endl; return 1; } } if (result == 0 || result == 2) { std::string consensus = graph->generate_consensus(); std::cout << "Consensus (" << consensus.size() << ")" << std::endl; std::cout << consensus << std::endl; } if (result == 1 || result == 2) { std::vector msa; graph->generate_multiple_sequence_alignment(msa); std::cout << "Multiple sequence alignment" << std::endl; for (const auto& it: msa) { std::cout << it << std::endl; } } graph->print_dot(dot_path); return 0; } void help() { std::cout << "usage: spoa [options ...] \n" "\n" " \n" " input file in FASTA/FASTQ format (can be compressed with gzip)\n" " containing sequences\n" "\n" " options:\n" " -m \n" " default: 5\n" " score for matching bases\n" " -n \n" " default: -4\n" " score for mismatching bases\n" " -g \n" " default: -8\n" " gap opening penalty (must be non-positive)\n" " -e \n" " default: -6\n" " gap extension penalty (must be non-positive)\n" " -q \n" " default: -10\n" " gap opening penalty of the second affine function\n" " (must be non-positive)\n" " -c \n" " default: -4\n" " gap extension penalty of the second affine function\n" " (must be non-positive)\n" " -l, --algorithm \n" " default: 0\n" " alignment mode:\n" " 0 - local (Smith-Waterman)\n" " 1 - global (Needleman-Wunsch)\n" " 2 - semi-global\n" " -r, --result \n" " default: 0\n" " result mode:\n" " 0 - consensus\n" " 1 - multiple sequence alignment\n" " 2 - 0 & 1\n" " -d, --dot \n" " output file for the final POA graph in DOT format\n" " --version\n" " prints the version number\n" " -h, --help\n" " prints the usage\n" "\n" " gap mode:\n" " linear if g >= e\n" " affine if g <= q or e >= c\n" " convex otherwise (default)\n"; } spoa-3.0.1/src/sequence.cpp000066400000000000000000000011121351220300700155440ustar00rootroot00000000000000/*! * @file sequence.cpp * * @brief Sequence class source file */ #include "sequence.hpp" namespace spoa { Sequence::Sequence(const char* name, std::uint32_t name_size, const char* data, std::uint32_t data_size) : name_(name, name_size), data_(data, data_size), quality_( data_size, 34) { } Sequence::Sequence(const char* name, std::uint32_t name_size, const char* data, std::uint32_t data_size, const char* quality, std::uint32_t quality_size) : name_(name, name_size), data_(data, data_size), quality_(quality, quality_size) { } } spoa-3.0.1/src/sequence.hpp000066400000000000000000000021161351220300700155560ustar00rootroot00000000000000/*! * @file sequence.hpp * * @brief Sequence class header file */ #pragma once #include #include #include #include namespace bioparser { template class FastaParser; template class FastqParser; } namespace spoa { class Sequence { public: ~Sequence() = default; const std::string& name() const { return name_; } const std::string& data() const { return data_; } const std::string& quality() const { return quality_; } friend bioparser::FastaParser; friend bioparser::FastqParser; private: Sequence(const char* name, std::uint32_t name_size, const char* data, std::uint32_t data_size); Sequence(const char* name, std::uint32_t name_size, const char* data, std::uint32_t data_size, const char* quality, std::uint32_t quality_size); Sequence(const Sequence&) = delete; const Sequence& operator=(const Sequence&) = delete; std::string name_; std::string data_; std::string quality_; }; } spoa-3.0.1/src/simd_alignment_engine.cpp000066400000000000000000002226221351220300700202660ustar00rootroot00000000000000/*! * @file simd_alignment_engine.cpp * * @brief SimdAlignmentEngine class source file */ #include #include #include extern "C" { #include // AVX2 and lower } #include "spoa/graph.hpp" #include "simd_alignment_engine.hpp" namespace spoa { // Taken from https://gcc.gnu.org/viewcvs/gcc?view=revision&revision=216149 inline void* align(std::size_t __align, std::size_t __size, void*& __ptr, std::size_t& __space) noexcept { const auto __intptr = reinterpret_cast(__ptr); const auto __aligned = (__intptr - 1u + __align) & -__align; const auto __diff = __aligned - __intptr; if ((__size + __diff) > __space) return nullptr; else { __space -= __diff; return __ptr = reinterpret_cast(__aligned); } } template T* allocateAlignedMemory(T** storage, std::uint32_t size, std::uint32_t alignment) { *storage = new T[size + alignment - 1]; void* ptr = static_cast(*storage); std::size_t storage_size = (size + alignment - 1) * sizeof(T); return static_cast(align(alignment, size * sizeof(T), ptr, storage_size)); } template struct InstructionSet; #if defined(__AVX2__) constexpr std::uint32_t kRegisterSize = 256; using __mxxxi = __m256i; inline __mxxxi _mmxxx_load_si(__mxxxi const* mem_addr) { return _mm256_load_si256(mem_addr); } inline void _mmxxx_store_si(__mxxxi* mem_addr, const __mxxxi& a) { _mm256_store_si256(mem_addr, a); } inline __mxxxi _mmxxx_or_si(const __mxxxi& a, const __mxxxi& b) { return _mm256_or_si256(a, b); } #define _mmxxx_slli_si(a, n) n < 16 ? \ _mm256_alignr_epi8(a, _mm256_permute2x128_si256(a, a, \ _MM_SHUFFLE(0, 0, 2, 0)), 16 - n) : \ _mm256_permute2x128_si256(a, a, _MM_SHUFFLE(0, 0, 2, 0)) #define _mmxxx_srli_si(a, n) \ _mm256_srli_si256(_mm256_permute2x128_si256(a, a, \ _MM_SHUFFLE(2, 0, 0, 1)), n - 16) template<> struct InstructionSet { using type = std::int16_t; static constexpr std::uint32_t kNumVar = kRegisterSize / (8 * sizeof(type)); static constexpr std::uint32_t kLogNumVar = 4; static constexpr std::uint32_t kLSS = 2; // Left Shift Size static constexpr std::uint32_t kRSS = 30; // Right Shift Size static inline __mxxxi _mmxxx_add_epi(const __mxxxi& a, const __mxxxi& b) { return _mm256_add_epi16(a, b); } static inline __mxxxi _mmxxx_sub_epi(const __mxxxi& a, const __mxxxi& b) { return _mm256_sub_epi16(a, b); } static inline __mxxxi _mmxxx_min_epi(const __mxxxi& a, const __mxxxi& b) { return _mm256_min_epi16(a, b); } static inline __mxxxi _mmxxx_max_epi(const __mxxxi& a, const __mxxxi& b) { return _mm256_max_epi16(a, b); } static inline __mxxxi _mmxxx_set1_epi(type a) { return _mm256_set1_epi16(a); } static inline void _mmxxx_prefix_max(__mxxxi& a, const __mxxxi* masks, const __mxxxi* penalties) { a = _mmxxx_max_epi(a, _mmxxx_or_si(masks[0], _mmxxx_slli_si( _mmxxx_add_epi(a, penalties[0]), 2))); a = _mmxxx_max_epi(a, _mmxxx_or_si(masks[1], _mmxxx_slli_si( _mmxxx_add_epi(a, penalties[1]), 4))); a = _mmxxx_max_epi(a, _mmxxx_or_si(masks[2], _mmxxx_slli_si( _mmxxx_add_epi(a, penalties[2]), 8))); a = _mmxxx_max_epi(a, _mmxxx_or_si(masks[3], _mmxxx_slli_si( _mmxxx_add_epi(a, penalties[3]), 16))); } }; template<> struct InstructionSet { using type = std::int32_t; static constexpr std::uint32_t kNumVar = kRegisterSize / (8 * sizeof(type)); static constexpr std::uint32_t kLogNumVar = 3; static constexpr std::uint32_t kLSS = 4; static constexpr std::uint32_t kRSS = 28; static inline __mxxxi _mmxxx_add_epi(const __mxxxi& a, const __mxxxi& b) { return _mm256_add_epi32(a, b); } static inline __mxxxi _mmxxx_sub_epi(const __mxxxi& a, const __mxxxi& b) { return _mm256_sub_epi32(a, b); } static inline __mxxxi _mmxxx_min_epi(const __mxxxi& a, const __mxxxi& b) { return _mm256_min_epi32(a, b); } static inline __mxxxi _mmxxx_max_epi(const __mxxxi& a, const __mxxxi& b) { return _mm256_max_epi32(a, b); } static inline __mxxxi _mmxxx_set1_epi(type a) { return _mm256_set1_epi32(a); } static inline void _mmxxx_prefix_max(__mxxxi& a, const __mxxxi* masks, const __mxxxi* penalties) { a = _mmxxx_max_epi(a, _mmxxx_or_si(masks[0], _mmxxx_slli_si( _mmxxx_add_epi(a, penalties[0]), 4))); a = _mmxxx_max_epi(a, _mmxxx_or_si(masks[1], _mmxxx_slli_si( _mmxxx_add_epi(a, penalties[1]), 8))); a = _mmxxx_max_epi(a, _mmxxx_or_si(masks[2], _mmxxx_slli_si( _mmxxx_add_epi(a, penalties[2]), 16))); } }; #elif defined(__SSE4_1__) constexpr std::uint32_t kRegisterSize = 128; using __mxxxi = __m128i; inline __mxxxi _mmxxx_load_si(__mxxxi const* mem_addr) { return _mm_load_si128(mem_addr); } inline void _mmxxx_store_si(__mxxxi* mem_addr, const __mxxxi& a) { _mm_store_si128(mem_addr, a); } inline __mxxxi _mmxxx_or_si(const __mxxxi& a, const __mxxxi& b) { return _mm_or_si128(a, b); } #define _mmxxx_slli_si(a, n) \ _mm_slli_si128(a, n) #define _mmxxx_srli_si(a, n) \ _mm_srli_si128(a, n) template<> struct InstructionSet { using type = std::int16_t; static constexpr std::uint32_t kNumVar = kRegisterSize / (8 * sizeof(type)); static constexpr std::uint32_t kLogNumVar = 3; static constexpr std::uint32_t kLSS = 2; static constexpr std::uint32_t kRSS = 14; static inline __mxxxi _mmxxx_add_epi(const __mxxxi& a, const __mxxxi& b) { return _mm_add_epi16(a, b); } static inline __mxxxi _mmxxx_sub_epi(const __mxxxi& a, const __mxxxi& b) { return _mm_sub_epi16(a, b); } static inline __mxxxi _mmxxx_min_epi(const __mxxxi& a, const __mxxxi& b) { return _mm_min_epi16(a, b); } static inline __mxxxi _mmxxx_max_epi(const __mxxxi& a, const __mxxxi& b) { return _mm_max_epi16(a, b); } static inline __mxxxi _mmxxx_set1_epi(type a) { return _mm_set1_epi16(a); } static inline void _mmxxx_prefix_max(__mxxxi& a, const __mxxxi* masks, const __mxxxi* penalties) { a = _mmxxx_max_epi(a, _mmxxx_or_si(masks[0], _mmxxx_slli_si( _mmxxx_add_epi(a, penalties[0]), 2))); a = _mmxxx_max_epi(a, _mmxxx_or_si(masks[1], _mmxxx_slli_si( _mmxxx_add_epi(a, penalties[1]), 4))); a = _mmxxx_max_epi(a, _mmxxx_or_si(masks[2], _mmxxx_slli_si( _mmxxx_add_epi(a, penalties[2]), 8))); } }; template<> struct InstructionSet { using type = std::int32_t; static constexpr std::uint32_t kNumVar = kRegisterSize / (8 * sizeof(type)); static constexpr std::uint32_t kLogNumVar = 2; static constexpr std::uint32_t kLSS = 4; static constexpr std::uint32_t kRSS = 12; static inline __mxxxi _mmxxx_add_epi(const __mxxxi& a, const __mxxxi& b) { return _mm_add_epi32(a, b); } static inline __mxxxi _mmxxx_sub_epi(const __mxxxi& a, const __mxxxi& b) { return _mm_sub_epi32(a, b); } static inline __mxxxi _mmxxx_min_epi(const __mxxxi& a, const __mxxxi& b) { return _mm_min_epi32(a, b); } static inline __mxxxi _mmxxx_max_epi(const __mxxxi& a, const __mxxxi& b) { return _mm_max_epi32(a, b); } static inline __mxxxi _mmxxx_set1_epi(type a) { return _mm_set1_epi32(a); } static inline void _mmxxx_prefix_max(__mxxxi& a, const __mxxxi* masks, const __mxxxi* penalties) { a = _mmxxx_max_epi(a, _mmxxx_or_si(masks[0], _mmxxx_slli_si( _mmxxx_add_epi(a, penalties[0]), 4))); a = _mmxxx_max_epi(a, _mmxxx_or_si(masks[1], _mmxxx_slli_si( _mmxxx_add_epi(a, penalties[1]), 8))); } }; #endif #if defined(__AVX2__) || defined(__SSE4_1__) template void _mmxxx_print(const __mxxxi& a) { __attribute__((aligned(kRegisterSize / 8))) typename T::type unpacked[T::kNumVar]; _mmxxx_store_si(reinterpret_cast<__mxxxi*>(unpacked), a); for (std::uint32_t i = 0; i < T::kNumVar; i++) { std::cout << unpacked[i] << " "; } } template typename T::type _mmxxx_max_value(const __mxxxi& a) { typename T::type max_score = 0; __attribute__((aligned(kRegisterSize / 8))) typename T::type unpacked[T::kNumVar]; _mmxxx_store_si(reinterpret_cast<__mxxxi*>(unpacked), a); for (std::uint32_t i = 0; i < T::kNumVar; i++) { max_score = std::max(max_score, unpacked[i]); } return max_score; } template typename T::type _mmxxx_value_at(const __mxxxi& a, std::uint32_t i) { __attribute__((aligned(kRegisterSize / 8))) typename T::type unpacked[T::kNumVar]; _mmxxx_store_si(reinterpret_cast<__mxxxi*>(unpacked), a); return unpacked[i]; } template std::int32_t _mmxxx_index_of(const __mxxxi* row, std::uint32_t row_width, typename T::type value) { for (std::uint32_t i = 0; i < row_width; ++i) { __attribute__((aligned(kRegisterSize / 8))) typename T::type unpacked[T::kNumVar]; _mmxxx_store_si(reinterpret_cast<__mxxxi*>(unpacked), row[i]); for (std::uint32_t j = 0; j < T::kNumVar; j++) { if (unpacked[j] == value) { return i * T::kNumVar + j; } } } return -1; } #endif std::unique_ptr createSimdAlignmentEngine(AlignmentType type, AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g, std::int8_t e, std::int8_t q, std::int8_t c) { #if defined(__AVX2__) || defined(__SSE4_1__) return std::unique_ptr(new SimdAlignmentEngine(type, subtype, m, n, g, e, q, c)); #else return nullptr; #endif } struct SimdAlignmentEngine::Implementation { #if defined(__AVX2__) || defined(__SSE4_1__) std::vector node_id_to_rank; std::unique_ptr<__mxxxi[]> sequence_profile_storage; std::uint32_t sequence_profile_size; __mxxxi* sequence_profile; std::vector first_column; std::unique_ptr<__mxxxi[]> M_storage; std::uint32_t M_size; __mxxxi* H; __mxxxi* F; __mxxxi* E; __mxxxi* O; __mxxxi* Q; std::unique_ptr<__mxxxi[]> masks_storage; std::uint32_t masks_size; __mxxxi* masks; std::unique_ptr<__mxxxi[]> penalties_storage; std::uint32_t penalties_size; __mxxxi* penalties; Implementation() : node_id_to_rank(), sequence_profile_storage(nullptr), sequence_profile_size(0), sequence_profile(nullptr), first_column(), M_storage(nullptr), M_size(0), H(nullptr), F(nullptr), E(nullptr), O(nullptr), Q(nullptr), masks_storage(nullptr), masks_size(0), masks(nullptr), penalties_storage(nullptr), penalties_size(0), penalties(nullptr) { } #endif }; SimdAlignmentEngine::SimdAlignmentEngine(AlignmentType type, AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g, std::int8_t e, std::int8_t q, std::int8_t c) : AlignmentEngine(type, subtype, m, n, g, e, q, c), pimpl_(new Implementation()) { } SimdAlignmentEngine::~SimdAlignmentEngine() { } void SimdAlignmentEngine::prealloc(std::uint32_t max_sequence_size, std::uint32_t alphabet_size) { #if defined(__AVX2__) || defined(__SSE4_1__) std::uint32_t longest_path = max_sequence_size * (alphabet_size + 1) + 1 + InstructionSet::kNumVar; std::uint32_t max_penalty = std::max(std::max(abs(m_), abs(n_)), std::max(abs(g_), abs(q_))); if (max_penalty * longest_path < std::numeric_limits::max()) { realloc((max_sequence_size / InstructionSet::kNumVar) + 1, alphabet_size * max_sequence_size, alphabet_size); } else { realloc((max_sequence_size / InstructionSet::kNumVar) + 1, alphabet_size * max_sequence_size, alphabet_size); } #endif } void SimdAlignmentEngine::realloc(std::uint32_t matrix_width, std::uint32_t matrix_height, std::uint32_t num_codes) { #if defined(__AVX2__) || defined(__SSE4_1__) if (pimpl_->node_id_to_rank.size() < matrix_height - 1) { pimpl_->node_id_to_rank.resize(matrix_height - 1, 0); } if (pimpl_->sequence_profile_size < num_codes * matrix_width) { __mxxxi* storage = nullptr; pimpl_->sequence_profile_size = num_codes * matrix_width; pimpl_->sequence_profile = allocateAlignedMemory(&storage, pimpl_->sequence_profile_size, kRegisterSize / 8); pimpl_->sequence_profile_storage.reset(); pimpl_->sequence_profile_storage = std::unique_ptr<__mxxxi[]>(storage); } if (subtype_ == AlignmentSubtype::kLinear) { if (pimpl_->first_column.size() < matrix_height) { pimpl_->first_column.resize(matrix_height, 0); } if (pimpl_->M_size < matrix_height * matrix_width) { __mxxxi* storage = nullptr; pimpl_->M_size = matrix_height * matrix_width; pimpl_->H = allocateAlignedMemory(&storage, pimpl_->M_size, kRegisterSize / 8); pimpl_->M_storage.reset(); pimpl_->M_storage = std::unique_ptr<__mxxxi[]>(storage); } } else if (subtype_ == AlignmentSubtype::kAffine) { if (pimpl_->first_column.size() < 2 * matrix_height) { pimpl_->first_column.resize(2 * matrix_height, 0); } if (pimpl_->M_size < 3 * matrix_height * matrix_width) { __mxxxi* storage = nullptr; pimpl_->M_size = 3 * matrix_height * matrix_width; pimpl_->H = allocateAlignedMemory(&storage, pimpl_->M_size, kRegisterSize / 8); pimpl_->F = pimpl_->H + matrix_height * matrix_width; pimpl_->E = pimpl_->F + matrix_height * matrix_width; pimpl_->M_storage.reset(); pimpl_->M_storage = std::unique_ptr<__mxxxi[]>(storage); } } else if (subtype_ == AlignmentSubtype::kConvex) { if (pimpl_->first_column.size() < 3 * matrix_height) { pimpl_->first_column.resize(3 * matrix_height, 0); } if (pimpl_->M_size < 5 * matrix_height * matrix_width) { __mxxxi* storage = nullptr; pimpl_->M_size = 5 * matrix_height * matrix_width; pimpl_->H = allocateAlignedMemory(&storage, pimpl_->M_size, kRegisterSize / 8); pimpl_->F = pimpl_->H + matrix_height * matrix_width; pimpl_->E = pimpl_->F + matrix_height * matrix_width; pimpl_->O = pimpl_->E + matrix_height * matrix_width; pimpl_->Q = pimpl_->O + matrix_height * matrix_width; pimpl_->M_storage.reset(); pimpl_->M_storage = std::unique_ptr<__mxxxi[]>(storage); } } if (pimpl_->masks_size < InstructionSet::kLogNumVar + 1) { __mxxxi* storage = nullptr; pimpl_->masks_size = InstructionSet::kLogNumVar + 1; pimpl_->masks = allocateAlignedMemory(&storage, pimpl_->masks_size, kRegisterSize / 8); pimpl_->masks_storage.reset(); pimpl_->masks_storage = std::unique_ptr<__mxxxi[]>(storage); } if (pimpl_->penalties_size < 2 * InstructionSet::kLogNumVar) { __mxxxi* storage = nullptr; pimpl_->penalties_size = 2 * InstructionSet::kLogNumVar; pimpl_->penalties = allocateAlignedMemory(&storage, pimpl_->penalties_size, kRegisterSize / 8); pimpl_->penalties_storage.reset(); pimpl_->penalties_storage = std::unique_ptr<__mxxxi[]>(storage); } #endif } template void SimdAlignmentEngine::initialize(const char* sequence, const std::unique_ptr& graph, std::uint32_t normal_matrix_width, std::uint32_t matrix_width, std::uint32_t matrix_height) noexcept { #if defined(__AVX2__) || defined(__SSE4_1__) std::int32_t padding_penatly = -1 * std::max(std::max(abs(m_), abs(n_)), std::max(abs(g_), abs(q_))); __attribute__((aligned(kRegisterSize / 8))) typename T::type unpacked[T::kNumVar] = {}; for (std::uint32_t i = 0; i < graph->num_codes(); ++i) { char c = graph->decoder(i); for (std::uint32_t j = 0; j < matrix_width; ++j) { for (std::uint32_t k = 0; k < T::kNumVar; ++k) { unpacked[k] = (j * T::kNumVar + k) < normal_matrix_width ? (c == sequence[j * T::kNumVar + k] ? m_ : n_) : padding_penatly; } pimpl_->sequence_profile[i * matrix_width + j] = _mmxxx_load_si(reinterpret_cast(unpacked)); } } const auto& rank_to_node_id = graph->rank_to_node_id(); for (std::uint32_t i = 0; i < rank_to_node_id.size(); ++i) { pimpl_->node_id_to_rank[rank_to_node_id[i]] = i; } typename T::type kNegativeInfinity = std::numeric_limits::min() + 1024; __mxxxi negative_infinities = T::_mmxxx_set1_epi(kNegativeInfinity); __mxxxi zeroes = T::_mmxxx_set1_epi(0); // initialize secondary matrices switch (subtype_) { case AlignmentSubtype::kConvex: for (std::uint32_t j = 0; j < matrix_width; ++j) { pimpl_->O[j] = negative_infinities; pimpl_->Q[j] = T::_mmxxx_set1_epi(q_ + j * T::kNumVar * c_); __mxxxi c = T::_mmxxx_set1_epi(c_); for (std::uint32_t k = 1; k < T::kNumVar; ++k) { c = _mmxxx_slli_si(c, T::kLSS); pimpl_->Q[j] = T::_mmxxx_add_epi(pimpl_->Q[j], c); } } pimpl_->first_column[2 * matrix_height] = 0; for (std::uint32_t i = 1; i < matrix_height; ++i) { const auto& edges = graph->nodes()[rank_to_node_id[i - 1]]->in_edges(); std::int32_t penalty = edges.empty() ? q_ - c_ : kNegativeInfinity; for (const auto& edge: edges) { std::uint32_t pred_i = pimpl_->node_id_to_rank[ edge->begin_node_id()] + 1; penalty = std::max(penalty, pimpl_->first_column[2 * matrix_height + pred_i]); } pimpl_->first_column[2 * matrix_height + i] = penalty + c_; } case AlignmentSubtype::kAffine: for (std::uint32_t j = 0; j < matrix_width; ++j) { pimpl_->F[j] = negative_infinities; pimpl_->E[j] = T::_mmxxx_set1_epi(g_ + j * T::kNumVar * e_); __mxxxi e = T::_mmxxx_set1_epi(e_); for (std::uint32_t k = 1; k < T::kNumVar; ++k) { e = _mmxxx_slli_si(e, T::kLSS); pimpl_->E[j] = T::_mmxxx_add_epi(pimpl_->E[j], e); } } pimpl_->first_column[matrix_height] = 0; for (std::uint32_t i = 1; i < matrix_height; ++i) { const auto& edges = graph->nodes()[rank_to_node_id[i - 1]]->in_edges(); std::int32_t penalty = edges.empty() ? g_ - e_ : kNegativeInfinity; for (const auto& edge: edges) { std::uint32_t pred_i = pimpl_->node_id_to_rank[ edge->begin_node_id()] + 1; penalty = std::max(penalty, pimpl_->first_column[matrix_height + pred_i]); } pimpl_->first_column[matrix_height + i] = penalty + e_; } case AlignmentSubtype::kLinear: default: break; } // initialize primary matrix switch (type_) { case AlignmentType::kSW: for (std::uint32_t j = 0; j < matrix_width; ++j) { pimpl_->H[j] = zeroes; } for (std::uint32_t i = 0; i < matrix_height; ++i) { pimpl_->first_column[i] = 0; } break; case AlignmentType::kNW: switch (subtype_) { case AlignmentSubtype::kConvex: for (std::uint32_t i = 0; i < matrix_height; ++i) { pimpl_->first_column[i] = std::max( pimpl_->first_column[matrix_height + i], pimpl_->first_column[2 * matrix_height + i]); } for (std::uint32_t j = 0; j < matrix_width; ++j) { pimpl_->H[j] = T::_mmxxx_max_epi(pimpl_->E[j], pimpl_->Q[j]); } break; case AlignmentSubtype::kAffine: for (std::uint32_t i = 0; i < matrix_height; ++i) { pimpl_->first_column[i] = pimpl_->first_column[matrix_height + i]; } for (std::uint32_t j = 0; j < matrix_width; ++j) { pimpl_->H[j] = pimpl_->E[j]; } break; case AlignmentSubtype::kLinear: pimpl_->first_column[0] = 0; for (std::uint32_t i = 1; i < matrix_height; ++i) { const auto& edges = graph->nodes()[rank_to_node_id[i - 1]]->in_edges(); std::int32_t penalty = edges.empty() ? 0 : kNegativeInfinity; for (const auto& edge: edges) { std::uint32_t pred_i = pimpl_->node_id_to_rank[ edge->begin_node_id()] + 1; penalty = std::max(penalty, pimpl_->first_column[pred_i]); } pimpl_->first_column[i] = penalty + g_; } for (std::uint32_t j = 0; j < matrix_width; ++j) { pimpl_->H[j] = T::_mmxxx_set1_epi(g_ + j * T::kNumVar * g_); __mxxxi g = T::_mmxxx_set1_epi(g_); for (std::uint32_t k = 1; k < T::kNumVar; ++k) { g = _mmxxx_slli_si(g, T::kLSS); pimpl_->H[j] = T::_mmxxx_add_epi(pimpl_->H[j], g); } } default: break; } break; case AlignmentType::kOV: switch (subtype_) { case AlignmentSubtype::kConvex: for (std::uint32_t j = 0; j < matrix_width; ++j) { pimpl_->H[j] = T::_mmxxx_max_epi(pimpl_->E[j], pimpl_->Q[j]); } break; case AlignmentSubtype::kAffine: for (std::uint32_t j = 0; j < matrix_width; ++j) { pimpl_->H[j] = pimpl_->E[j]; } break; case AlignmentSubtype::kLinear: for (std::uint32_t j = 0; j < matrix_width; ++j) { pimpl_->H[j] = T::_mmxxx_set1_epi(g_ + j * T::kNumVar * g_); __mxxxi g = T::_mmxxx_set1_epi(g_); for (std::uint32_t k = 1; k < T::kNumVar; ++k) { g = _mmxxx_slli_si(g, T::kLSS); pimpl_->H[j] = T::_mmxxx_add_epi(pimpl_->H[j], g); } } break; default: break; } for (std::uint32_t i = 0; i < matrix_height; ++i) { pimpl_->first_column[i] = 0; } break; default: break; } #endif } Alignment SimdAlignmentEngine::align(const char* sequence, std::uint32_t sequence_size, const std::unique_ptr& graph) noexcept { if (graph->nodes().empty() || sequence_size == 0) { return Alignment(); } #if defined(__AVX2__) || defined(__SSE4_1__) std::uint32_t longest_path = graph->nodes().size() + 1 + sequence_size + InstructionSet::kNumVar; std::uint32_t max_penalty = std::max(std::max(abs(m_), abs(n_)), abs(g_)); if (max_penalty * longest_path < std::numeric_limits::max()) { if (subtype_ == AlignmentSubtype::kLinear) { return linear>(sequence, sequence_size, graph); } else if (subtype_ == AlignmentSubtype::kAffine) { return affine>(sequence, sequence_size, graph); } else if (subtype_ == AlignmentSubtype::kConvex) { return convex>(sequence, sequence_size, graph); } } else { if (subtype_ == AlignmentSubtype::kLinear) { return linear>(sequence, sequence_size, graph); } else if (subtype_ == AlignmentSubtype::kAffine) { return affine>(sequence, sequence_size, graph); } else if (subtype_ == AlignmentSubtype::kConvex) { return convex>(sequence, sequence_size, graph); } } return Alignment(); #else return Alignment(); #endif } template Alignment SimdAlignmentEngine::linear(const char* sequence, std::uint32_t sequence_size, const std::unique_ptr& graph) noexcept { #if defined(__AVX2__) || defined(__SSE4_1__) std::uint32_t normal_matrix_width = sequence_size; std::uint32_t matrix_width = (sequence_size + (sequence_size % T::kNumVar == 0 ? 0 : T::kNumVar - sequence_size % T::kNumVar)) / T::kNumVar; std::uint32_t matrix_height = graph->nodes().size() + 1; const auto& rank_to_node_id = graph->rank_to_node_id(); // realloc realloc(matrix_width, matrix_height, graph->num_codes()); // initialize initialize(sequence, graph, normal_matrix_width, matrix_width, matrix_height); typename T::type kNegativeInfinity = std::numeric_limits::min() + 1024; __attribute__((aligned(kRegisterSize / 8))) typename T::type unpacked[T::kNumVar] = {0}; for (std::uint32_t i = 0, j = 0; i < T::kNumVar && j < T::kLogNumVar; ++i) { unpacked[i] = kNegativeInfinity; if ((i & (i + 1)) == 0) { pimpl_->masks[j++] = _mmxxx_load_si(reinterpret_cast(unpacked)); } } pimpl_->masks[T::kLogNumVar] = _mmxxx_slli_si(T::_mmxxx_set1_epi( kNegativeInfinity), T::kLSS); pimpl_->penalties[0] = T::_mmxxx_set1_epi(g_); for (std::uint32_t i = 1; i < T::kLogNumVar; ++i) { pimpl_->penalties[i] = T::_mmxxx_add_epi(pimpl_->penalties[i - 1], pimpl_->penalties[i - 1]); } typename T::type max_score = type_ == AlignmentType::kSW ? 0 : kNegativeInfinity; std::int32_t max_i = -1; std::int32_t max_j = -1; std::uint32_t last_column_id = (normal_matrix_width - 1) % T::kNumVar; __mxxxi zeroes = T::_mmxxx_set1_epi(0); __mxxxi g = T::_mmxxx_set1_epi(g_); // alignment for (std::uint32_t node_id: rank_to_node_id) { const auto& node = graph->nodes()[node_id]; __mxxxi* char_profile = &(pimpl_->sequence_profile[node->code() * matrix_width]); std::uint32_t i = pimpl_->node_id_to_rank[node_id] + 1; std::uint32_t pred_i = node->in_edges().empty() ? 0 : pimpl_->node_id_to_rank[node->in_edges()[0]->begin_node_id()] + 1; __mxxxi* H_row = &(pimpl_->H[i * matrix_width]); __mxxxi* H_pred_row = &(pimpl_->H[pred_i * matrix_width]); __mxxxi x = _mmxxx_srli_si(T::_mmxxx_set1_epi(pimpl_->first_column[pred_i]), T::kRSS); for (std::uint32_t j = 0; j < matrix_width; ++j) { // get diagonal __mxxxi t1 = _mmxxx_srli_si(H_pred_row[j], T::kRSS); H_row[j] = _mmxxx_or_si(_mmxxx_slli_si(H_pred_row[j], T::kLSS), x); x = t1; // update M H_row[j] = T::_mmxxx_max_epi(T::_mmxxx_add_epi(H_row[j], char_profile[j]), T::_mmxxx_add_epi(H_pred_row[j], g)); } // check other predecessors for (std::uint32_t p = 1; p < node->in_edges().size(); ++p) { pred_i = pimpl_->node_id_to_rank[node->in_edges()[p]->begin_node_id()] + 1; H_pred_row = &(pimpl_->H[pred_i * matrix_width]); x = _mmxxx_srli_si(T::_mmxxx_set1_epi(pimpl_->first_column[pred_i]), T::kRSS); for (std::uint32_t j = 0; j < matrix_width; ++j) { // get diagonal __mxxxi t1 = _mmxxx_srli_si(H_pred_row[j], T::kRSS); __mxxxi m = _mmxxx_or_si(_mmxxx_slli_si(H_pred_row[j], T::kLSS), x); x = t1; // updage M H_row[j] = T::_mmxxx_max_epi(H_row[j], T::_mmxxx_max_epi( T::_mmxxx_add_epi(m, char_profile[j]), T::_mmxxx_add_epi(H_pred_row[j], g))); } } __mxxxi score = T::_mmxxx_set1_epi(kNegativeInfinity); x = _mmxxx_srli_si(T::_mmxxx_add_epi(T::_mmxxx_set1_epi( pimpl_->first_column[i]), g), T::kRSS); for (std::uint32_t j = 0; j < matrix_width; ++j) { // add last element of previous vector into this one H_row[j] = T::_mmxxx_max_epi(H_row[j], _mmxxx_or_si(x, pimpl_->masks[T::kLogNumVar])); T::_mmxxx_prefix_max(H_row[j], pimpl_->masks, pimpl_->penalties); x = _mmxxx_srli_si(T::_mmxxx_add_epi(H_row[j], g), T::kRSS); if (type_ == AlignmentType::kSW) { H_row[j] = T::_mmxxx_max_epi(H_row[j], zeroes); } score = T::_mmxxx_max_epi(score, H_row[j]); } if (type_ == AlignmentType::kSW) { std::int32_t max_row_score = _mmxxx_max_value(score); if (max_score < max_row_score) { max_score = max_row_score; max_i = i; } } else if (type_ == AlignmentType::kOV) { if (node->out_edges().empty()) { std::int32_t max_row_score = _mmxxx_max_value(score); if (max_score < max_row_score) { max_score = max_row_score; max_i = i; } } } else if (type_ == AlignmentType::kNW) { if (node->out_edges().empty()) { std::int32_t max_row_score = _mmxxx_value_at( H_row[matrix_width - 1], last_column_id); if (max_score < max_row_score) { max_score = max_row_score; max_i = i; } } } } if (max_i == -1 && max_j == -1) { // no alignment found return Alignment(); } if (type_ == AlignmentType::kSW) { max_j = _mmxxx_index_of(&(pimpl_->H[max_i * matrix_width]), matrix_width, max_score); } else if (type_ == AlignmentType::kOV) { if (graph->nodes()[rank_to_node_id[max_i - 1]]->out_edges().empty()) { max_j = _mmxxx_index_of(&(pimpl_->H[max_i * matrix_width]), matrix_width, max_score); } else { max_j = normal_matrix_width - 1; } } else if (type_ == AlignmentType::kNW) { max_j = normal_matrix_width - 1; } // backtrack std::uint32_t max_num_predecessors = 1; for (std::uint32_t i = 0; i < (std::uint32_t) max_i; ++i) { max_num_predecessors = std::max(max_num_predecessors, (std::uint32_t) graph->nodes()[rank_to_node_id[i]]->in_edges().size()); } typename T::type* backtrack_storage = nullptr; typename T::type* H = allocateAlignedMemory(&backtrack_storage, 3 * T::kNumVar + 2 * T::kNumVar * max_num_predecessors, kRegisterSize / 8); typename T::type* H_pred = H + T::kNumVar; typename T::type* H_diag_pred = H_pred + T::kNumVar * max_num_predecessors; typename T::type* H_left_pred = H_diag_pred + T::kNumVar * max_num_predecessors; typename T::type* profile = H_left_pred + T::kNumVar; std::vector predecessors; std::int32_t i = max_i; std::int32_t j = max_j; std::int32_t prev_i = 0, prev_j = 0; std::uint32_t j_div = j / T::kNumVar; std::uint32_t j_mod = j % T::kNumVar; bool load_next_segment = true; Alignment alignment; do { // check stop condition if (j == -1 || i == 0) { break; } const auto& node = graph->nodes()[rank_to_node_id[i - 1]]; // load everything if (load_next_segment) { predecessors.clear(); // load current cells _mmxxx_store_si(reinterpret_cast<__mxxxi*>(H), pimpl_->H[i * matrix_width + j_div]); // load predecessors cells if (node->in_edges().empty()) { predecessors.emplace_back(0); _mmxxx_store_si(reinterpret_cast<__mxxxi*>(H_pred), pimpl_->H[j_div]); } else { std::uint32_t store_pos = 0; for (const auto& edge: node->in_edges()) { predecessors.emplace_back( pimpl_->node_id_to_rank[edge->begin_node_id()] + 1); _mmxxx_store_si( reinterpret_cast<__mxxxi*>(&H_pred[store_pos * T::kNumVar]), pimpl_->H[predecessors.back() * matrix_width + j_div]); ++store_pos; } } // load query profile cells _mmxxx_store_si(reinterpret_cast<__mxxxi*>(profile), pimpl_->sequence_profile[node->code() * matrix_width + j_div]); } // check stop condition if (type_ == AlignmentType::kSW && H[j_mod] == 0) { break; } if (j_mod == 0) { // border case if (j_div > 0) { _mmxxx_store_si(reinterpret_cast<__mxxxi*>(H_left_pred), pimpl_->H[i * matrix_width + j_div - 1]); for (std::uint32_t p = 0; p < predecessors.size(); ++p) { _mmxxx_store_si( reinterpret_cast<__mxxxi*>(&H_diag_pred[p * T::kNumVar]), pimpl_->H[predecessors[p] * matrix_width + (j_div - 1)]); } } else { H_left_pred[T::kNumVar - 1] = pimpl_->first_column[i]; for (std::uint32_t p = 0; p < predecessors.size(); ++p) { H_diag_pred[(p + 1) * T::kNumVar - 1] = pimpl_->first_column[predecessors[p]]; } } } // find best predecessor cell bool predecessor_found = false; if (i != 0) { for (std::uint32_t p = 0; p < predecessors.size(); ++p) { if ((j_mod == 0 && H[j_mod] == H_diag_pred[(p + 1) * T::kNumVar - 1] + profile[j_mod]) || (j_mod != 0 && H[j_mod] == H_pred[p * T::kNumVar + j_mod - 1] + profile[j_mod])) { prev_i = predecessors[p]; prev_j = j - 1; predecessor_found = true; break; } } } if (!predecessor_found && i != 0) { for (std::uint32_t p = 0; p < predecessors.size(); ++p) { if (H[j_mod] == H_pred[p * T::kNumVar + j_mod] + g_) { prev_i = predecessors[p]; prev_j = j; predecessor_found = true; break; } } } if (!predecessor_found) { if ((j_mod == 0 && H[j_mod] == H_left_pred[T::kNumVar - 1] + g_) || (j_mod != 0 && H[j_mod] == H[j_mod - 1] + g_)) { prev_i = i; prev_j = j - 1; predecessor_found = true; } } alignment.emplace_back(i == prev_i ? -1 : rank_to_node_id[i - 1], j == prev_j ? -1 : j); // update for next round load_next_segment = (i == prev_i ? false : true) || (j != prev_j && prev_j % T::kNumVar == T::kNumVar - 1 ? true : false); i = prev_i; j = prev_j; j_div = j / T::kNumVar; j_mod = j % T::kNumVar; } while (true); delete[] backtrack_storage; // update alignment for NW (backtrack stops on first row or column) if (type_ == AlignmentType::kNW) { while (i == 0 && j != -1) { alignment.emplace_back(-1, j); --j; } while (i != 0 && j == -1) { alignment.emplace_back(rank_to_node_id[i - 1], -1); const auto& node = graph->nodes()[rank_to_node_id[i - 1]]; if (node->in_edges().empty()) { i = 0; } else { for (const auto& edge: node->in_edges()) { std::uint32_t pred_i = pimpl_->node_id_to_rank[edge->begin_node_id()] + 1; if (pimpl_->first_column[i] == pimpl_->first_column[pred_i] + g_) { i = pred_i; break; } } } } } std::reverse(alignment.begin(), alignment.end()); return alignment; #else return Alignment(); #endif } template Alignment SimdAlignmentEngine::affine(const char* sequence, std::uint32_t sequence_size, const std::unique_ptr& graph) noexcept { #if defined(__AVX2__) || defined(__SSE4_1__) std::uint32_t normal_matrix_width = sequence_size; std::uint32_t matrix_width = (sequence_size + (sequence_size % T::kNumVar == 0 ? 0 : T::kNumVar - sequence_size % T::kNumVar)) / T::kNumVar; std::uint32_t matrix_height = graph->nodes().size() + 1; const auto& rank_to_node_id = graph->rank_to_node_id(); // realloc realloc(matrix_width, matrix_height, graph->num_codes()); // initialize initialize(sequence, graph, normal_matrix_width, matrix_width, matrix_height); typename T::type kNegativeInfinity = std::numeric_limits::min() + 1024; typename T::type max_score = type_ == AlignmentType::kSW ? 0 : kNegativeInfinity; std::int32_t max_i = -1; std::int32_t max_j = -1; std::uint32_t last_column_id = (normal_matrix_width - 1) % T::kNumVar; __mxxxi zeroes = T::_mmxxx_set1_epi(0); __mxxxi g = T::_mmxxx_set1_epi(g_ - e_); __mxxxi e = T::_mmxxx_set1_epi(e_); __attribute__((aligned(kRegisterSize / 8))) typename T::type unpacked[T::kNumVar] = {0}; for (std::uint32_t i = 0, j = 0; i < T::kNumVar && j < T::kLogNumVar; ++i) { unpacked[i] = kNegativeInfinity; if ((i & (i + 1)) == 0) { pimpl_->masks[j++] = _mmxxx_load_si(reinterpret_cast(unpacked)); } } pimpl_->masks[T::kLogNumVar] = _mmxxx_slli_si(T::_mmxxx_set1_epi(kNegativeInfinity), T::kLSS); pimpl_->penalties[0] = T::_mmxxx_set1_epi(e_); for (std::uint32_t i = 1; i < T::kLogNumVar; ++i) { pimpl_->penalties[i] = T::_mmxxx_add_epi(pimpl_->penalties[i - 1], pimpl_->penalties[i - 1]); } // alignment for (std::uint32_t node_id: rank_to_node_id) { const auto& node = graph->nodes()[node_id]; __mxxxi* char_profile = &(pimpl_->sequence_profile[node->code() * matrix_width]); std::uint32_t i = pimpl_->node_id_to_rank[node_id] + 1; __mxxxi* H_row = &(pimpl_->H[i * matrix_width]); __mxxxi* F_row = &(pimpl_->F[i * matrix_width]); std::uint32_t pred_i = node->in_edges().empty() ? 0 : pimpl_->node_id_to_rank[node->in_edges()[0]->begin_node_id()] + 1; __mxxxi* H_pred_row = &(pimpl_->H[pred_i * matrix_width]); __mxxxi* F_pred_row = &(pimpl_->F[pred_i * matrix_width]); __mxxxi x = _mmxxx_srli_si(T::_mmxxx_set1_epi( pimpl_->first_column[pred_i]), T::kRSS); for (std::uint32_t j = 0; j < matrix_width; ++j) { // update F F_row[j] = T::_mmxxx_add_epi(T::_mmxxx_max_epi(T::_mmxxx_add_epi( H_pred_row[j], g), F_pred_row[j]), e); // update H H_row[j] = T::_mmxxx_add_epi(_mmxxx_or_si(_mmxxx_slli_si( H_pred_row[j], T::kLSS), x), char_profile[j]); x = _mmxxx_srli_si(H_pred_row[j], T::kRSS); } // check other predecessors for (std::uint32_t p = 1; p < node->in_edges().size(); ++p) { pred_i = pimpl_->node_id_to_rank[node->in_edges()[p]->begin_node_id()] + 1; H_pred_row = &(pimpl_->H[pred_i * matrix_width]); F_pred_row = &(pimpl_->F[pred_i * matrix_width]); x = _mmxxx_srli_si(T::_mmxxx_set1_epi( pimpl_->first_column[pred_i]), T::kRSS); for (std::uint32_t j = 0; j < matrix_width; ++j) { // update F F_row[j] = T::_mmxxx_max_epi(F_row[j], T::_mmxxx_add_epi( T::_mmxxx_max_epi(T::_mmxxx_add_epi(H_pred_row[j], g), F_pred_row[j]), e)); // update H H_row[j] = T::_mmxxx_max_epi(H_row[j], T::_mmxxx_add_epi( _mmxxx_or_si(_mmxxx_slli_si(H_pred_row[j], T::kLSS), x), char_profile[j])); x = _mmxxx_srli_si(H_pred_row[j], T::kRSS); } } __mxxxi* E_row = &(pimpl_->E[i * matrix_width]); __mxxxi score = zeroes; x = T::_mmxxx_set1_epi(pimpl_->first_column[i]); for (std::uint32_t j = 0; j < matrix_width; ++j) { H_row[j] = T::_mmxxx_max_epi(H_row[j], F_row[j]); E_row[j] = T::_mmxxx_add_epi(T::_mmxxx_add_epi(_mmxxx_or_si( _mmxxx_slli_si(H_row[j], T::kLSS), _mmxxx_srli_si(x, T::kRSS)), g), e); T::_mmxxx_prefix_max(E_row[j], pimpl_->masks, pimpl_->penalties); H_row[j] = T::_mmxxx_max_epi(H_row[j], E_row[j]); x = T::_mmxxx_max_epi(H_row[j], T::_mmxxx_sub_epi(E_row[j], g)); if (type_ == AlignmentType::kSW) { H_row[j] = T::_mmxxx_max_epi(H_row[j], zeroes); } score = T::_mmxxx_max_epi(score, H_row[j]); } if (type_ == AlignmentType::kSW) { std::int32_t max_row_score = _mmxxx_max_value(score); if (max_score < max_row_score) { max_score = max_row_score; max_i = i; } } else if (type_ == AlignmentType::kOV) { if (node->out_edges().empty()) { std::int32_t max_row_score = _mmxxx_max_value(score); if (max_score < max_row_score) { max_score = max_row_score; max_i = i; } } } else if (type_ == AlignmentType::kNW) { if (node->out_edges().empty()) { std::int32_t max_row_score = _mmxxx_value_at( H_row[matrix_width - 1], last_column_id); if (max_score < max_row_score) { max_score = max_row_score; max_i = i; } } } } if (max_i == -1 && max_j == -1) { // no alignment found return Alignment(); } if (type_ == AlignmentType::kSW) { max_j = _mmxxx_index_of(&(pimpl_->H[max_i * matrix_width]), matrix_width, max_score); } else if (type_ == AlignmentType::kOV) { if (graph->nodes()[rank_to_node_id[max_i - 1]]->out_edges().empty()) { max_j = _mmxxx_index_of(&(pimpl_->H[max_i * matrix_width]), matrix_width, max_score); } else { max_j = normal_matrix_width - 1; } } else if (type_ == AlignmentType::kNW) { max_j = normal_matrix_width - 1; } // backtrack std::uint32_t max_num_predecessors = 0; for (std::uint32_t i = 0; i < (std::uint32_t) max_i; ++i) { max_num_predecessors = std::max(max_num_predecessors, (std::uint32_t) graph->nodes()[rank_to_node_id[i]]->in_edges().size()); } typename T::type* backtrack_storage = nullptr; typename T::type* H = allocateAlignedMemory(&backtrack_storage, 6 * T::kNumVar + 3 * T::kNumVar * max_num_predecessors, kRegisterSize / 8); typename T::type* H_pred = H + T::kNumVar; typename T::type* H_diag_pred = H_pred + T::kNumVar * max_num_predecessors; typename T::type* H_left = H_diag_pred + T::kNumVar * max_num_predecessors; typename T::type* F = H_left + T::kNumVar; typename T::type* F_pred = F + T::kNumVar; typename T::type* E = F_pred + T::kNumVar * max_num_predecessors; typename T::type* E_left = E + T::kNumVar; typename T::type* profile = E_left + T::kNumVar; std::vector predecessors; std::int32_t i = max_i; std::int32_t j = max_j; std::int32_t prev_i = 0, prev_j = 0; std::uint32_t j_div = j / T::kNumVar; std::uint32_t j_mod = j % T::kNumVar; bool load_next_segment = true; Alignment alignment; do { // check stop condition if (j == -1 || i == 0) { break; } const auto& node = graph->nodes()[rank_to_node_id[i - 1]]; // load everything if (load_next_segment) { predecessors.clear(); // load current cells _mmxxx_store_si(reinterpret_cast<__mxxxi*>(H), pimpl_->H[i * matrix_width + j_div]); _mmxxx_store_si(reinterpret_cast<__mxxxi*>(E), pimpl_->E[i * matrix_width + j_div]); // load predecessors cells if (node->in_edges().empty()) { predecessors.emplace_back(0); _mmxxx_store_si(reinterpret_cast<__mxxxi*>(H_pred), pimpl_->H[j_div]); _mmxxx_store_si(reinterpret_cast<__mxxxi*>(F_pred), pimpl_->F[j_div]); } else { std::uint32_t store_pos = 0; for (const auto& edge: node->in_edges()) { predecessors.emplace_back( pimpl_->node_id_to_rank[edge->begin_node_id()] + 1); _mmxxx_store_si( reinterpret_cast<__mxxxi*>(&H_pred[store_pos * T::kNumVar]), pimpl_->H[predecessors.back() * matrix_width + j_div]); _mmxxx_store_si( reinterpret_cast<__mxxxi*>(&F_pred[store_pos * T::kNumVar]), pimpl_->F[predecessors.back() * matrix_width + j_div]); ++store_pos; } } // load query profile cells _mmxxx_store_si(reinterpret_cast<__mxxxi*>(profile), pimpl_->sequence_profile[node->code() * matrix_width + j_div]); } // check stop condition if (type_ == AlignmentType::kSW && H[j_mod] == 0) { break; } if (j_mod == 0) { // border case if (j_div > 0) { for (std::uint32_t p = 0; p < predecessors.size(); ++p) { _mmxxx_store_si( reinterpret_cast<__mxxxi*>(&H_diag_pred[p * T::kNumVar]), pimpl_->H[predecessors[p] * matrix_width + (j_div - 1)]); } _mmxxx_store_si(reinterpret_cast<__mxxxi*>(H_left), pimpl_->H[i * matrix_width + j_div - 1]); _mmxxx_store_si(reinterpret_cast<__mxxxi*>(E_left), pimpl_->E[i * matrix_width + j_div - 1]); } else { for (std::uint32_t p = 0; p < predecessors.size(); ++p) { H_diag_pred[(p + 1) * T::kNumVar - 1] = pimpl_->first_column[predecessors[p]]; } H_left[T::kNumVar - 1] = pimpl_->first_column[i]; E_left[T::kNumVar - 1] = pimpl_->first_column[i]; } } // find best predecessor cell bool predecessor_found = false, extend_left = false, extend_up = false; if (i != 0) { for (std::uint32_t p = 0; p < predecessors.size(); ++p) { if ((j_mod == 0 && H[j_mod] == H_diag_pred[(p + 1) * T::kNumVar - 1] + profile[j_mod]) || (j_mod != 0 && H[j_mod] == H_pred[p * T::kNumVar + j_mod - 1] + profile[j_mod])) { prev_i = predecessors[p]; prev_j = j - 1; predecessor_found = true; break; } } } if (!predecessor_found && i != 0) { for (std::uint32_t p = 0; p < predecessors.size(); ++p) { if ((extend_up = H[j_mod] == F_pred[p * T::kNumVar + j_mod] + e_) || H[j_mod] == H_pred[p * T::kNumVar + j_mod] + g_) { prev_i = predecessors[p]; prev_j = j; predecessor_found = true; break; } } } if (!predecessor_found) { if ((j_mod != 0 && ((extend_left = H[j_mod] == E[j_mod - 1] + e_) || H[j_mod] == H[j_mod - 1] + g_)) || (j_mod == 0 && ((extend_left = H[j_mod] == E_left[T::kNumVar - 1] + e_ ) || H[j_mod] == H_left[T::kNumVar - 1] + g_))) { prev_i = i; prev_j = j - 1; predecessor_found = true; } } alignment.emplace_back(i == prev_i ? -1 : rank_to_node_id[i - 1], j == prev_j ? -1 : j); // update for next round load_next_segment = (i == prev_i ? false : true) || (j != prev_j && prev_j % T::kNumVar == T::kNumVar - 1 ? true : false); i = prev_i; j = prev_j; j_div = j / T::kNumVar; j_mod = j % T::kNumVar; if (extend_left) { while (true) { // load if (j_mod == T::kNumVar - 1) { _mmxxx_store_si(reinterpret_cast<__mxxxi*>(E), pimpl_->E[i * matrix_width + j_div]); } else if (j_mod == 0) { // boarder case if (j_div > 0) { _mmxxx_store_si(reinterpret_cast<__mxxxi*>(E_left), pimpl_->E[i * matrix_width + j_div - 1]); } } alignment.emplace_back(-1, j); --j; j_div = j / T::kNumVar; j_mod = j % T::kNumVar; if (j == -1 || (j_mod != T::kNumVar - 1 && E[j_mod] + e_ != E[j_mod + 1]) || (j_mod == T::kNumVar - 1 && E_left[j_mod] + e_ != E[0])) { break; } } load_next_segment = true; } else if (extend_up) { while (true) { // load _mmxxx_store_si(reinterpret_cast<__mxxxi*>(F), pimpl_->F[i * matrix_width + j_div]); prev_i = 0; predecessors.clear(); std::uint32_t store_pos = 0; for (const auto& it: graph->nodes()[rank_to_node_id[i - 1]]->in_edges()) { predecessors.emplace_back( pimpl_->node_id_to_rank[it->begin_node_id()] + 1); _mmxxx_store_si( reinterpret_cast<__mxxxi*>(&H_pred[store_pos * T::kNumVar]), pimpl_->H[predecessors.back() * matrix_width + j_div]); _mmxxx_store_si( reinterpret_cast<__mxxxi*>(&F_pred[store_pos * T::kNumVar]), pimpl_->F[predecessors.back() * matrix_width + j_div]); ++store_pos; } bool stop = false; for (std::uint32_t p = 0; p < predecessors.size(); ++p) { if ((stop = F[j_mod] == H_pred[p * T::kNumVar + j_mod] + g_) || F[j_mod] == F_pred[p * T::kNumVar + j_mod] + e_) { prev_i = predecessors[p]; break; } } alignment.emplace_back(rank_to_node_id[i - 1], -1); i = prev_i; if (stop || i == 0) { break; } } } } while (true); delete[] backtrack_storage; // update alignment for NW (backtrack stops on first row or column) if (type_ == AlignmentType::kNW) { while (i == 0 && j != -1) { alignment.emplace_back(-1, j); --j; } while (i != 0 && j == -1) { alignment.emplace_back(rank_to_node_id[i - 1], -1); const auto& node = graph->nodes()[rank_to_node_id[i - 1]]; if (node->in_edges().empty()) { i = 0; } else { for (const auto& edge: node->in_edges()) { std::uint32_t pred_i = pimpl_->node_id_to_rank[edge->begin_node_id()] + 1; if (pimpl_->first_column[i] == pimpl_->first_column[pred_i] + e_) { i = pred_i; break; } } } } } std::reverse(alignment.begin(), alignment.end()); return alignment; #else return Alignment(); #endif } template Alignment SimdAlignmentEngine::convex(const char* sequence, std::uint32_t sequence_size, const std::unique_ptr& graph) noexcept { #if defined(__AVX2__) || defined(__SSE4_1__) std::uint32_t normal_matrix_width = sequence_size; std::uint32_t matrix_width = (sequence_size + (sequence_size % T::kNumVar == 0 ? 0 : T::kNumVar - sequence_size % T::kNumVar)) / T::kNumVar; std::uint32_t matrix_height = graph->nodes().size() + 1; const auto& rank_to_node_id = graph->rank_to_node_id(); // realloc realloc(matrix_width, matrix_height, graph->num_codes()); // initialize initialize(sequence, graph, normal_matrix_width, matrix_width, matrix_height); typename T::type kNegativeInfinity = std::numeric_limits::min() + 1024; typename T::type max_score = type_ == AlignmentType::kSW ? 0 : kNegativeInfinity; std::int32_t max_i = -1; std::int32_t max_j = -1; std::uint32_t last_column_id = (normal_matrix_width - 1) % T::kNumVar; __mxxxi zeroes = T::_mmxxx_set1_epi(0); __mxxxi g = T::_mmxxx_set1_epi(g_ - e_); __mxxxi e = T::_mmxxx_set1_epi(e_); __mxxxi q = T::_mmxxx_set1_epi(q_ - c_); __mxxxi c = T::_mmxxx_set1_epi(c_); __attribute__((aligned(kRegisterSize / 8))) typename T::type unpacked[T::kNumVar] = {0}; for (std::uint32_t i = 0, j = 0; i < T::kNumVar && j < T::kLogNumVar; ++i) { unpacked[i] = kNegativeInfinity; if ((i & (i + 1)) == 0) { pimpl_->masks[j++] = _mmxxx_load_si(reinterpret_cast(unpacked)); } } pimpl_->masks[T::kLogNumVar] = _mmxxx_slli_si(T::_mmxxx_set1_epi(kNegativeInfinity), T::kLSS); pimpl_->penalties[0] = T::_mmxxx_set1_epi(e_); for (std::uint32_t i = 1; i < T::kLogNumVar; ++i) { pimpl_->penalties[i] = T::_mmxxx_add_epi(pimpl_->penalties[i - 1], pimpl_->penalties[i - 1]); } pimpl_->penalties[T::kLogNumVar] = T::_mmxxx_set1_epi(c_); for (std::uint32_t i = T::kLogNumVar + 1; i < 2 * T::kLogNumVar; ++i) { pimpl_->penalties[i] = T::_mmxxx_add_epi(pimpl_->penalties[i - 1], pimpl_->penalties[i - 1]); } // alignment for (std::uint32_t node_id: rank_to_node_id) { const auto& node = graph->nodes()[node_id]; __mxxxi* char_profile = &(pimpl_->sequence_profile[node->code() * matrix_width]); std::uint32_t i = pimpl_->node_id_to_rank[node_id] + 1; __mxxxi* H_row = &(pimpl_->H[i * matrix_width]); __mxxxi* F_row = &(pimpl_->F[i * matrix_width]); __mxxxi* O_row = &(pimpl_->O[i * matrix_width]); std::uint32_t pred_i = node->in_edges().empty() ? 0 : pimpl_->node_id_to_rank[node->in_edges()[0]->begin_node_id()] + 1; __mxxxi* H_pred_row = &(pimpl_->H[pred_i * matrix_width]); __mxxxi* F_pred_row = &(pimpl_->F[pred_i * matrix_width]); __mxxxi* O_pred_row = &(pimpl_->O[pred_i * matrix_width]); __mxxxi x = _mmxxx_srli_si(T::_mmxxx_set1_epi( pimpl_->first_column[pred_i]), T::kRSS); for (std::uint32_t j = 0; j < matrix_width; ++j) { // update F F_row[j] = T::_mmxxx_add_epi(T::_mmxxx_max_epi(T::_mmxxx_add_epi( H_pred_row[j], g), F_pred_row[j]), e); // update O O_row[j] = T::_mmxxx_add_epi(T::_mmxxx_max_epi(T::_mmxxx_add_epi( H_pred_row[j], q), O_pred_row[j]), c); // update H H_row[j] = T::_mmxxx_add_epi(_mmxxx_or_si(_mmxxx_slli_si( H_pred_row[j], T::kLSS), x), char_profile[j]); x = _mmxxx_srli_si(H_pred_row[j], T::kRSS); } // check other predecessors for (std::uint32_t p = 1; p < node->in_edges().size(); ++p) { pred_i = pimpl_->node_id_to_rank[node->in_edges()[p]->begin_node_id()] + 1; H_pred_row = &(pimpl_->H[pred_i * matrix_width]); F_pred_row = &(pimpl_->F[pred_i * matrix_width]); O_pred_row = &(pimpl_->O[pred_i * matrix_width]); x = _mmxxx_srli_si(T::_mmxxx_set1_epi( pimpl_->first_column[pred_i]), T::kRSS); for (std::uint32_t j = 0; j < matrix_width; ++j) { // update F F_row[j] = T::_mmxxx_max_epi(F_row[j], T::_mmxxx_add_epi( T::_mmxxx_max_epi(T::_mmxxx_add_epi(H_pred_row[j], g), F_pred_row[j]), e)); // update O O_row[j] = T::_mmxxx_max_epi(O_row[j], T::_mmxxx_add_epi( T::_mmxxx_max_epi(T::_mmxxx_add_epi(H_pred_row[j], q), O_pred_row[j]), c)); // update H H_row[j] = T::_mmxxx_max_epi(H_row[j], T::_mmxxx_add_epi( _mmxxx_or_si(_mmxxx_slli_si(H_pred_row[j], T::kLSS), x), char_profile[j])); x = _mmxxx_srli_si(H_pred_row[j], T::kRSS); } } __mxxxi* E_row = &(pimpl_->E[i * matrix_width]); __mxxxi* Q_row = &(pimpl_->Q[i * matrix_width]); x = T::_mmxxx_set1_epi(pimpl_->first_column[i]); __mxxxi y = T::_mmxxx_set1_epi(pimpl_->first_column[i]); __mxxxi score = zeroes; for (std::uint32_t j = 0; j < matrix_width; ++j) { H_row[j] = T::_mmxxx_max_epi(H_row[j], T::_mmxxx_max_epi(F_row[j], O_row[j])); E_row[j] = T::_mmxxx_add_epi(T::_mmxxx_add_epi(_mmxxx_or_si( _mmxxx_slli_si(H_row[j], T::kLSS), _mmxxx_srli_si(x, T::kRSS)), g), e); T::_mmxxx_prefix_max(E_row[j], pimpl_->masks, pimpl_->penalties); Q_row[j] = T::_mmxxx_add_epi(T::_mmxxx_add_epi(_mmxxx_or_si( _mmxxx_slli_si(H_row[j], T::kLSS), _mmxxx_srli_si(y, T::kRSS)), q), c); T::_mmxxx_prefix_max(Q_row[j], pimpl_->masks, &pimpl_->penalties[T::kLogNumVar]); H_row[j] = T::_mmxxx_max_epi(H_row[j], T::_mmxxx_max_epi(E_row[j], Q_row[j])); x = T::_mmxxx_max_epi(H_row[j], T::_mmxxx_sub_epi(E_row[j], g)); y = T::_mmxxx_max_epi(H_row[j], T::_mmxxx_sub_epi(Q_row[j], q)); if (type_ == AlignmentType::kSW) { H_row[j] = T::_mmxxx_max_epi(H_row[j], zeroes); } score = T::_mmxxx_max_epi(score, H_row[j]); } if (type_ == AlignmentType::kSW) { std::int32_t max_row_score = _mmxxx_max_value(score); if (max_score < max_row_score) { max_score = max_row_score; max_i = i; } } else if (type_ == AlignmentType::kOV) { if (node->out_edges().empty()) { std::int32_t max_row_score = _mmxxx_max_value(score); if (max_score < max_row_score) { max_score = max_row_score; max_i = i; } } } else if (type_ == AlignmentType::kNW) { if (node->out_edges().empty()) { std::int32_t max_row_score = _mmxxx_value_at( H_row[matrix_width - 1], last_column_id); if (max_score < max_row_score) { max_score = max_row_score; max_i = i; } } } } if (max_i == -1 && max_j == -1) { // no alignment found return Alignment(); } if (type_ == AlignmentType::kSW) { max_j = _mmxxx_index_of(&(pimpl_->H[max_i * matrix_width]), matrix_width, max_score); } else if (type_ == AlignmentType::kOV) { if (graph->nodes()[rank_to_node_id[max_i - 1]]->out_edges().empty()) { max_j = _mmxxx_index_of(&(pimpl_->H[max_i * matrix_width]), matrix_width, max_score); } else { max_j = normal_matrix_width - 1; } } else if (type_ == AlignmentType::kNW) { max_j = normal_matrix_width - 1; } // backtrack std::uint32_t max_num_predecessors = 0; for (std::uint32_t i = 0; i < (std::uint32_t) max_i; ++i) { max_num_predecessors = std::max(max_num_predecessors, (std::uint32_t) graph->nodes()[rank_to_node_id[i]]->in_edges().size()); } typename T::type* backtrack_storage = nullptr; typename T::type* H = allocateAlignedMemory(&backtrack_storage, 9 * T::kNumVar + 4 * T::kNumVar * max_num_predecessors, kRegisterSize / 8); typename T::type* H_pred = H + T::kNumVar; typename T::type* H_diag_pred = H_pred + T::kNumVar * max_num_predecessors; typename T::type* H_left = H_diag_pred + T::kNumVar * max_num_predecessors; typename T::type* F = H_left + T::kNumVar; typename T::type* F_pred = F + T::kNumVar; typename T::type* O = F_pred + T::kNumVar * max_num_predecessors; typename T::type* O_pred = O + T::kNumVar; typename T::type* E = O_pred + T::kNumVar * max_num_predecessors; typename T::type* E_left = E + T::kNumVar; typename T::type* Q = E_left + T::kNumVar; typename T::type* Q_left = Q + T::kNumVar; typename T::type* profile = Q_left + T::kNumVar; std::vector predecessors; std::int32_t i = max_i; std::int32_t j = max_j; std::int32_t prev_i = 0, prev_j = 0; std::uint32_t j_div = j / T::kNumVar; std::uint32_t j_mod = j % T::kNumVar; bool load_next_segment = true; Alignment alignment; do { // check stop condition if (j == -1 || i == 0) { break; } const auto& node = graph->nodes()[rank_to_node_id[i - 1]]; // load everything if (load_next_segment) { predecessors.clear(); // load current cells _mmxxx_store_si(reinterpret_cast<__mxxxi*>(H), pimpl_->H[i * matrix_width + j_div]); _mmxxx_store_si(reinterpret_cast<__mxxxi*>(E), pimpl_->E[i * matrix_width + j_div]); _mmxxx_store_si(reinterpret_cast<__mxxxi*>(Q), pimpl_->Q[i * matrix_width + j_div]); // load predecessors cells if (node->in_edges().empty()) { predecessors.emplace_back(0); _mmxxx_store_si(reinterpret_cast<__mxxxi*>(H_pred), pimpl_->H[j_div]); _mmxxx_store_si(reinterpret_cast<__mxxxi*>(F_pred), pimpl_->F[j_div]); _mmxxx_store_si(reinterpret_cast<__mxxxi*>(O_pred), pimpl_->O[j_div]); } else { std::uint32_t store_pos = 0; for (const auto& edge: node->in_edges()) { predecessors.emplace_back( pimpl_->node_id_to_rank[edge->begin_node_id()] + 1); _mmxxx_store_si( reinterpret_cast<__mxxxi*>(&H_pred[store_pos * T::kNumVar]), pimpl_->H[predecessors.back() * matrix_width + j_div]); _mmxxx_store_si( reinterpret_cast<__mxxxi*>(&F_pred[store_pos * T::kNumVar]), pimpl_->F[predecessors.back() * matrix_width + j_div]); _mmxxx_store_si( reinterpret_cast<__mxxxi*>(&O_pred[store_pos * T::kNumVar]), pimpl_->O[predecessors.back() * matrix_width + j_div]); ++store_pos; } } // load query profile cells _mmxxx_store_si(reinterpret_cast<__mxxxi*>(profile), pimpl_->sequence_profile[node->code() * matrix_width + j_div]); } // check stop condition if (type_ == AlignmentType::kSW && H[j_mod] == 0) { break; } if (j_mod == 0) { // border case if (j_div > 0) { for (std::uint32_t p = 0; p < predecessors.size(); ++p) { _mmxxx_store_si( reinterpret_cast<__mxxxi*>(&H_diag_pred[p * T::kNumVar]), pimpl_->H[predecessors[p] * matrix_width + (j_div - 1)]); } _mmxxx_store_si(reinterpret_cast<__mxxxi*>(H_left), pimpl_->H[i * matrix_width + j_div - 1]); _mmxxx_store_si(reinterpret_cast<__mxxxi*>(E_left), pimpl_->E[i * matrix_width + j_div - 1]); _mmxxx_store_si(reinterpret_cast<__mxxxi*>(Q_left), pimpl_->Q[i * matrix_width + j_div - 1]); } else { for (std::uint32_t p = 0; p < predecessors.size(); ++p) { H_diag_pred[(p + 1) * T::kNumVar - 1] = pimpl_->first_column[predecessors[p]]; } H_left[T::kNumVar - 1] = pimpl_->first_column[i]; E_left[T::kNumVar - 1] = pimpl_->first_column[i]; Q_left[T::kNumVar - 1] = pimpl_->first_column[i]; } } // find best predecessor cell bool predecessor_found = false, extend_left = false, extend_up = false; if (i != 0) { for (std::uint32_t p = 0; p < predecessors.size(); ++p) { if ((j_mod == 0 && H[j_mod] == H_diag_pred[(p + 1) * T::kNumVar - 1] + profile[j_mod]) || (j_mod != 0 && H[j_mod] == H_pred[p * T::kNumVar + j_mod - 1] + profile[j_mod])) { prev_i = predecessors[p]; prev_j = j - 1; predecessor_found = true; break; } } } if (!predecessor_found && i != 0) { for (std::uint32_t p = 0; p < predecessors.size(); ++p) { if ((extend_up = H[j_mod] == F_pred[p * T::kNumVar + j_mod] + e_) || H[j_mod] == H_pred[p * T::kNumVar + j_mod] + g_ || (extend_up = H[j_mod] == O_pred[p * T::kNumVar + j_mod] + c_) || H[j_mod] == H_pred[p * T::kNumVar + j_mod] + q_) { prev_i = predecessors[p]; prev_j = j; predecessor_found = true; break; } } } if (!predecessor_found) { if ((j_mod != 0 && ((extend_left = H[j_mod] == E[j_mod - 1] + e_) || H[j_mod] == H[j_mod - 1] + g_ || (extend_left = H[j_mod] == Q[j_mod - 1] + c_) || H[j_mod] == H[j_mod - 1] + q_)) || (j_mod == 0 && ((extend_left = H[j_mod] == E_left[T::kNumVar - 1] + e_) || H[j_mod] == H_left[T::kNumVar - 1] + g_ || (extend_left = H[j_mod] == Q_left[T::kNumVar - 1] + c_) || H[j_mod] == H_left[T::kNumVar - 1] + q_))) { prev_i = i; prev_j = j - 1; predecessor_found = true; } } alignment.emplace_back(i == prev_i ? -1 : rank_to_node_id[i - 1], j == prev_j ? -1 : j); // update for next round load_next_segment = (i == prev_i ? false : true) || (j != prev_j && prev_j % T::kNumVar == T::kNumVar - 1 ? true : false); i = prev_i; j = prev_j; j_div = j / T::kNumVar; j_mod = j % T::kNumVar; if (extend_left) { while (true) { // load if (j_mod == T::kNumVar - 1) { _mmxxx_store_si(reinterpret_cast<__mxxxi*>(E), pimpl_->E[i * matrix_width + j_div]); _mmxxx_store_si(reinterpret_cast<__mxxxi*>(Q), pimpl_->Q[i * matrix_width + j_div]); } else if (j_mod == 0) { // boarder case if (j_div > 0) { _mmxxx_store_si(reinterpret_cast<__mxxxi*>(E_left), pimpl_->E[i * matrix_width + j_div - 1]); _mmxxx_store_si(reinterpret_cast<__mxxxi*>(Q_left), pimpl_->Q[i * matrix_width + j_div - 1]); } } alignment.emplace_back(-1, j); --j; j_div = j / T::kNumVar; j_mod = j % T::kNumVar; if (j == -1 || (j_mod != T::kNumVar - 1 && E[j_mod] + e_ != E[j_mod + 1]) || (j_mod == T::kNumVar - 1 && E_left[j_mod] + e_ != E[0]) || (j_mod != T::kNumVar - 1 && Q[j_mod] + c_ != Q[j_mod + 1]) || (j_mod == T::kNumVar - 1 && Q_left[j_mod] + c_ != Q[0])) { break; } } load_next_segment = true; } else if (extend_up) { while (true) { // load _mmxxx_store_si(reinterpret_cast<__mxxxi*>(F), pimpl_->F[i * matrix_width + j_div]); _mmxxx_store_si(reinterpret_cast<__mxxxi*>(O), pimpl_->O[i * matrix_width + j_div]); predecessors.clear(); std::uint32_t store_pos = 0; for (const auto& it: graph->nodes()[rank_to_node_id[i - 1]]->in_edges()) { predecessors.emplace_back( pimpl_->node_id_to_rank[it->begin_node_id()] + 1); _mmxxx_store_si( reinterpret_cast<__mxxxi*>(&H_pred[store_pos * T::kNumVar]), pimpl_->H[predecessors.back() * matrix_width + j_div]); _mmxxx_store_si( reinterpret_cast<__mxxxi*>(&F_pred[store_pos * T::kNumVar]), pimpl_->F[predecessors.back() * matrix_width + j_div]); _mmxxx_store_si( reinterpret_cast<__mxxxi*>(&O_pred[store_pos * T::kNumVar]), pimpl_->O[predecessors.back() * matrix_width + j_div]); ++store_pos; } bool stop = true; prev_i = 0; for (std::uint32_t p = 0; p < predecessors.size(); ++p) { if (F[j_mod] == F_pred[p * T::kNumVar + j_mod] + e_ || O[j_mod] == O_pred[p * T::kNumVar + j_mod] + c_) { prev_i = predecessors[p]; stop = false; break; } } if (stop == true) { for (std::uint32_t p = 0; p < predecessors.size(); ++p) { if (F[j_mod] == H_pred[p * T::kNumVar + j_mod] + g_ || O[j_mod] == H_pred[p * T::kNumVar + j_mod] + q_) { prev_i = predecessors[p]; break; } } } alignment.emplace_back(rank_to_node_id[i - 1], -1); i = prev_i; if (stop || i == 0) { break; } } } } while (true); delete[] backtrack_storage; // update alignment for NW (backtrack stops on first row or column) if (type_ == AlignmentType::kNW) { while (i == 0 && j != -1) { alignment.emplace_back(-1, j); --j; } while (i != 0 && j == -1) { alignment.emplace_back(rank_to_node_id[i - 1], -1); const auto& node = graph->nodes()[rank_to_node_id[i - 1]]; if (node->in_edges().empty()) { i = 0; } else { for (const auto& edge: node->in_edges()) { std::uint32_t pred_i = pimpl_->node_id_to_rank[edge->begin_node_id()] + 1; if (pimpl_->first_column[matrix_height + i] == pimpl_->first_column[matrix_height + pred_i] + e_ || pimpl_->first_column[2 * matrix_height + i] == pimpl_->first_column[2 * matrix_height + pred_i] + c_ ) { i = pred_i; break; } } } } } std::reverse(alignment.begin(), alignment.end()); return alignment; #else return Alignment(); #endif } } spoa-3.0.1/src/simd_alignment_engine.hpp000066400000000000000000000042761351220300700202760ustar00rootroot00000000000000/*! * @file simd_alignment_engine.hpp * * @brief SimdAlignmentEngine class header file */ #pragma once #include #include #include #include #include "spoa/alignment_engine.hpp" namespace spoa { class Graph; class SimdAlignmentEngine; std::unique_ptr createSimdAlignmentEngine(AlignmentType type, AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g, std::int8_t e, std::int8_t q, std::int8_t c); class SimdAlignmentEngine: public AlignmentEngine { public: ~SimdAlignmentEngine(); void prealloc(std::uint32_t max_sequence_size, std::uint32_t alphabet_size) override; Alignment align(const char* sequence, std::uint32_t sequence_size, const std::unique_ptr& graph) noexcept override; friend std::unique_ptr createSimdAlignmentEngine( AlignmentType type, AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g, std::int8_t e, std::int8_t q, std::int8_t c); private: SimdAlignmentEngine(AlignmentType type, AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g, std::int8_t e, std::int8_t q, std::int8_t c); SimdAlignmentEngine(const SimdAlignmentEngine&) = delete; const SimdAlignmentEngine& operator=(const SimdAlignmentEngine&) = delete; template Alignment linear(const char* sequence, std::uint32_t sequence_size, const std::unique_ptr& graph) noexcept; template Alignment affine(const char* sequence, std::uint32_t sequence_size, const std::unique_ptr& graph) noexcept; template Alignment convex(const char* sequence, std::uint32_t sequence_size, const std::unique_ptr& graph) noexcept; void realloc(std::uint32_t matrix_width, std::uint32_t matrix_height, std::uint32_t num_codes); template void initialize(const char* sequence, const std::unique_ptr& graph, std::uint32_t normal_matrix_width, std::uint32_t matrix_width, std::uint32_t matrix_height) noexcept; struct Implementation; std::unique_ptr pimpl_; }; } spoa-3.0.1/src/sisd_alignment_engine.cpp000066400000000000000000001027671351220300700203030ustar00rootroot00000000000000/*! * @file sisd_alignment_engine.cpp * * @brief SisdAlignmentEngine class source file */ #include #include #include "spoa/graph.hpp" #include "sisd_alignment_engine.hpp" namespace spoa { constexpr std::int32_t kNegativeInfinity = std::numeric_limits::min() + 1024; std::unique_ptr createSisdAlignmentEngine(AlignmentType type, AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g, std::int8_t e, std::int8_t q, std::int8_t c) { return std::unique_ptr(new SisdAlignmentEngine(type, subtype, m, n, g, e, q, c)); } struct SisdAlignmentEngine::Implementation { std::vector node_id_to_rank; std::vector sequence_profile; std::vector M; std::int32_t* H; std::int32_t* F; std::int32_t* E; std::int32_t* O; std::int32_t* Q; Implementation() : node_id_to_rank(), sequence_profile(), M(), H(nullptr), F(nullptr), E(nullptr), O(nullptr), Q(nullptr) { } }; SisdAlignmentEngine::SisdAlignmentEngine(AlignmentType type, AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g, std::int8_t e, std::int8_t q, std::int8_t c) : AlignmentEngine(type, subtype, m, n, g, e, q, c), pimpl_(new Implementation()) { } SisdAlignmentEngine::~SisdAlignmentEngine() { } void SisdAlignmentEngine::prealloc(std::uint32_t max_sequence_size, std::uint32_t alphabet_size) { realloc(max_sequence_size, alphabet_size * max_sequence_size, alphabet_size); } void SisdAlignmentEngine::realloc(std::uint32_t matrix_width, std::uint32_t matrix_height, std::uint32_t num_codes) { if (pimpl_->node_id_to_rank.size() < matrix_height - 1) { pimpl_->node_id_to_rank.resize(matrix_height - 1, 0); } if (pimpl_->sequence_profile.size() < num_codes * matrix_width) { pimpl_->sequence_profile.resize(num_codes * matrix_width, 0); } if (subtype_ == AlignmentSubtype::kLinear) { if (pimpl_->M.size() < matrix_height * matrix_width) { pimpl_->M.resize(matrix_width * matrix_height, 0); pimpl_->H = pimpl_->M.data(); pimpl_->F = nullptr; pimpl_->E = nullptr; } } else if (subtype_ == AlignmentSubtype::kAffine) { if (pimpl_->M.size() < 3 * matrix_height * matrix_width) { pimpl_->M.resize(3 * matrix_width * matrix_height, 0); pimpl_->H = pimpl_->M.data(); pimpl_->F = pimpl_->H + matrix_width * matrix_height; pimpl_->E = pimpl_->F + matrix_width * matrix_height; } } else if (subtype_ == AlignmentSubtype::kConvex) { if (pimpl_->M.size() < 5 * matrix_height * matrix_width) { pimpl_->M.resize(5 * matrix_width * matrix_height, 0); pimpl_->H = pimpl_->M.data(); pimpl_->F = pimpl_->H + matrix_width * matrix_height; pimpl_->E = pimpl_->F + matrix_width * matrix_height; pimpl_->O = pimpl_->E + matrix_width * matrix_height; pimpl_->Q = pimpl_->O + matrix_width * matrix_height; } } } void SisdAlignmentEngine::initialize(const char* sequence, std::uint32_t sequence_size, const std::unique_ptr& graph) noexcept { std::uint32_t matrix_width = sequence_size + 1; std::uint32_t matrix_height = graph->nodes().size() + 1; for (std::uint32_t i = 0; i < graph->num_codes(); ++i) { char c = graph->decoder(i); pimpl_->sequence_profile[i * matrix_width] = 0; for (std::uint32_t j = 0; j < sequence_size; ++j) { pimpl_->sequence_profile[i * matrix_width + (j + 1)] = (c == sequence[j] ? m_ : n_); } } const auto& rank_to_node_id = graph->rank_to_node_id(); for (std::uint32_t i = 0; i < rank_to_node_id.size(); ++i) { pimpl_->node_id_to_rank[rank_to_node_id[i]] = i; } // initialize secondary matrices switch (subtype_) { case AlignmentSubtype::kConvex: pimpl_->O[0] = 0; pimpl_->Q[0] = 0; for (std::uint32_t j = 1; j < matrix_width; ++j) { pimpl_->O[j] = kNegativeInfinity; pimpl_->Q[j] = q_ + (j - 1) * c_; } for (std::uint32_t i = 1; i < matrix_height; ++i) { const auto& edges = graph->nodes()[rank_to_node_id[i - 1]]->in_edges(); std::int32_t penalty = edges.empty() ? q_ - c_ : kNegativeInfinity; for (const auto& edge: edges) { std::uint32_t pred_i = pimpl_->node_id_to_rank[ edge->begin_node_id()] + 1; penalty = std::max(penalty, pimpl_->O[pred_i * matrix_width]); } pimpl_->O[i * matrix_width] = penalty + c_; pimpl_->Q[i * matrix_width] = kNegativeInfinity; } case AlignmentSubtype::kAffine: pimpl_->F[0] = 0; pimpl_->E[0] = 0; for (std::uint32_t j = 1; j < matrix_width; ++j) { pimpl_->F[j] = kNegativeInfinity; pimpl_->E[j] = g_ + (j - 1) * e_; } for (std::uint32_t i = 1; i < matrix_height; ++i) { const auto& edges = graph->nodes()[rank_to_node_id[i - 1]]->in_edges(); std::int32_t penalty = edges.empty() ? g_ - e_ : kNegativeInfinity; for (const auto& edge: edges) { std::uint32_t pred_i = pimpl_->node_id_to_rank[ edge->begin_node_id()] + 1; penalty = std::max(penalty, pimpl_->F[pred_i * matrix_width]); } pimpl_->F[i * matrix_width] = penalty + e_; pimpl_->E[i * matrix_width] = kNegativeInfinity; } case AlignmentSubtype::kLinear: pimpl_->H[0] = 0; default: break; } // initialize primary matrix switch (type_) { case AlignmentType::kSW: for (std::uint32_t j = 1; j < matrix_width; ++j) { pimpl_->H[j] = 0; } for (std::uint32_t i = 1; i < matrix_height; ++i) { pimpl_->H[i * matrix_width] = 0; } break; case AlignmentType::kNW: switch (subtype_) { case AlignmentSubtype::kConvex: for (std::uint32_t j = 1; j < matrix_width; ++j) { pimpl_->H[j] = std::max(pimpl_->Q[j], pimpl_->E[j]); } for (std::uint32_t i = 1; i < matrix_height; ++i) { pimpl_->H[i * matrix_width] = std::max( pimpl_->O[i * matrix_width], pimpl_->F[i * matrix_width]); } break; case AlignmentSubtype::kAffine: for (std::uint32_t j = 1; j < matrix_width; ++j) { pimpl_->H[j] = pimpl_->E[j]; } for (std::uint32_t i = 1; i < matrix_height; ++i) { pimpl_->H[i * matrix_width] = pimpl_->F[i * matrix_width]; } break; case AlignmentSubtype::kLinear: for (std::uint32_t j = 1; j < matrix_width; ++j) { pimpl_->H[j] = j * g_; } for (std::uint32_t i = 1; i < matrix_height; ++i) { const auto& edges = graph->nodes()[rank_to_node_id[i - 1]]->in_edges(); std::int32_t penalty = edges.empty() ? 0 : kNegativeInfinity; for (const auto& edge: edges) { std::uint32_t pred_i = pimpl_->node_id_to_rank[ edge->begin_node_id()] + 1; penalty = std::max(penalty, pimpl_->H[pred_i * matrix_width]); } pimpl_->H[i * matrix_width] = penalty + g_; } default: break; } break; case AlignmentType::kOV: switch (subtype_) { case AlignmentSubtype::kConvex: for (std::uint32_t j = 1; j < matrix_width; ++j) { pimpl_->H[j] = std::max(pimpl_->Q[j], pimpl_->E[j]); } break; case AlignmentSubtype::kAffine: for (std::uint32_t j = 1; j < matrix_width; ++j) { pimpl_->H[j] = pimpl_->E[j]; } break; case AlignmentSubtype::kLinear: for (std::uint32_t j = 1; j < matrix_width; ++j) { pimpl_->H[j] = j * g_; } break; default: break; } for (std::uint32_t i = 1; i < matrix_height; ++i) { pimpl_->H[i * matrix_width] = 0; } break; default: break; } } Alignment SisdAlignmentEngine::align(const char* sequence, std::uint32_t sequence_size, const std::unique_ptr& graph) noexcept { if (graph->nodes().empty() || sequence_size == 0) { return Alignment(); } if (subtype_ == AlignmentSubtype::kLinear) { return linear(sequence, sequence_size, graph); } else if (subtype_ == AlignmentSubtype::kAffine) { return affine(sequence, sequence_size, graph); } else if (subtype_ == AlignmentSubtype::kConvex) { return convex(sequence, sequence_size, graph); } return Alignment(); } Alignment SisdAlignmentEngine::linear(const char* sequence, std::uint32_t sequence_size, const std::unique_ptr& graph) noexcept { std::uint32_t matrix_width = sequence_size + 1; std::uint32_t matrix_height = graph->nodes().size() + 1; const auto& rank_to_node_id = graph->rank_to_node_id(); // realloc realloc(matrix_width, matrix_height, graph->num_codes()); // initialize initialize(sequence, sequence_size, graph); std::int32_t max_score = type_ == AlignmentType::kSW ? 0 : kNegativeInfinity; std::int32_t max_i = -1; std::int32_t max_j = -1; auto update_max_score = [&max_score, &max_i, &max_j](std::int32_t* H_row, std::uint32_t i, std::uint32_t j) -> void { if (max_score < H_row[j]) { max_score = H_row[j]; max_i = i; max_j = j; } return; }; // alignment for (std::uint32_t node_id: rank_to_node_id) { const auto& node = graph->nodes()[node_id]; const auto& char_profile = &(pimpl_->sequence_profile[node->code() * matrix_width]); std::uint32_t i = pimpl_->node_id_to_rank[node_id] + 1; std::int32_t* H_row = &(pimpl_->H[i * matrix_width]); std::uint32_t pred_i = node->in_edges().empty() ? 0 : pimpl_->node_id_to_rank[node->in_edges()[0]->begin_node_id()] + 1; std::int32_t* H_pred_row = &(pimpl_->H[pred_i * matrix_width]); for (std::uint32_t j = 1; j < matrix_width; ++j) { // update H H_row[j] = std::max(H_pred_row[j - 1] + char_profile[j], H_pred_row[j] + g_); } // check other predeccessors for (std::uint32_t p = 1; p < node->in_edges().size(); ++p) { pred_i = pimpl_->node_id_to_rank[node->in_edges()[p]->begin_node_id()] + 1; H_pred_row = &(pimpl_->H[pred_i * matrix_width]); for (std::uint32_t j = 1; j < matrix_width; ++j) { // update H H_row[j] = std::max(H_pred_row[j - 1] + char_profile[j], std::max(H_row[j], H_pred_row[j] + g_)); } } for (std::uint32_t j = 1; j < matrix_width; ++j) { // update H H_row[j] = std::max(H_row[j - 1] + g_, H_row[j]); if (type_ == AlignmentType::kSW) { H_row[j] = std::max(H_row[j], 0); update_max_score(H_row, i, j); } else if (type_ == AlignmentType::kNW && (j == matrix_width - 1 && node->out_edges().empty())) { update_max_score(H_row, i, j); } else if (type_ == AlignmentType::kOV && (node->out_edges().empty())) { update_max_score(H_row, i, j); } } } // backtrack Alignment alignment; std::uint32_t i = max_i; std::uint32_t j = max_j; auto sw_condition = [this, &i, &j, &matrix_width]() { return (pimpl_->H[i * matrix_width + j] == 0) ? false : true; }; auto nw_condition = [&i, &j]() { return (i == 0 && j == 0) ? false : true; }; auto ov_condition = [&i, &j]() { return (i == 0 || j == 0) ? false : true; }; std::uint32_t prev_i = 0; std::uint32_t prev_j = 0; while ((type_ == AlignmentType::kSW && sw_condition()) || (type_ == AlignmentType::kNW && nw_condition()) || (type_ == AlignmentType::kOV && ov_condition())) { auto H_ij = pimpl_->H[i * matrix_width + j]; bool predecessor_found = false; if (i != 0 && j != 0) { const auto& node = graph->nodes()[rank_to_node_id[i - 1]]; std::int32_t match_cost = pimpl_->sequence_profile[node->code() * matrix_width + j]; std::uint32_t pred_i = node->in_edges().empty() ? 0 : pimpl_->node_id_to_rank[node->in_edges()[0]->begin_node_id()] + 1; if (H_ij == pimpl_->H[pred_i * matrix_width + (j - 1)] + match_cost) { prev_i = pred_i; prev_j = j - 1; predecessor_found = true; } else { const auto& edges = node->in_edges(); for (std::uint32_t p = 1; p < edges.size(); ++p) { std::uint32_t pred_i = pimpl_->node_id_to_rank[edges[p]->begin_node_id()] + 1; if (H_ij == pimpl_->H[pred_i * matrix_width + (j - 1)] + match_cost) { prev_i = pred_i; prev_j = j - 1; predecessor_found = true; break; } } } } if (!predecessor_found && i != 0) { const auto& node = graph->nodes()[rank_to_node_id[i - 1]]; std::uint32_t pred_i = node->in_edges().empty() ? 0 : pimpl_->node_id_to_rank[node->in_edges()[0]->begin_node_id()] + 1; if (H_ij == pimpl_->H[pred_i * matrix_width + j] + g_) { prev_i = pred_i; prev_j = j; predecessor_found = true; } else { const auto& edges = node->in_edges(); for (std::uint32_t p = 1; p < edges.size(); ++p) { std::uint32_t pred_i = pimpl_->node_id_to_rank[edges[p]->begin_node_id()] + 1; if (H_ij == pimpl_->H[pred_i * matrix_width + j] + g_) { prev_i = pred_i; prev_j = j; predecessor_found = true; break; } } } } if (!predecessor_found && H_ij == pimpl_->H[i * matrix_width + j - 1] + g_) { prev_i = i; prev_j = j - 1; predecessor_found = true; } alignment.emplace_back(i == prev_i ? -1 : rank_to_node_id[i - 1], j == prev_j ? -1 : j - 1); i = prev_i; j = prev_j; } std::reverse(alignment.begin(), alignment.end()); return alignment; } Alignment SisdAlignmentEngine::affine(const char* sequence, std::uint32_t sequence_size, const std::unique_ptr& graph) noexcept { std::uint32_t matrix_width = sequence_size + 1; std::uint32_t matrix_height = graph->nodes().size() + 1; const auto& rank_to_node_id = graph->rank_to_node_id(); // realloc realloc(matrix_width, matrix_height, graph->num_codes()); // initialize initialize(sequence, sequence_size, graph); std::int32_t max_score = type_ == AlignmentType::kSW ? 0 : kNegativeInfinity; std::int32_t max_i = -1; std::int32_t max_j = -1; auto update_max_score = [&max_score, &max_i, &max_j](std::int32_t* H_row, std::uint32_t i, std::uint32_t j) -> void { if (max_score < H_row[j]) { max_score = H_row[j]; max_i = i; max_j = j; } return; }; // alignment for (std::uint32_t node_id: rank_to_node_id) { const auto& node = graph->nodes()[node_id]; const auto& char_profile = &(pimpl_->sequence_profile[node->code() * matrix_width]); std::uint32_t i = pimpl_->node_id_to_rank[node_id] + 1; std::int32_t* H_row = &(pimpl_->H[i * matrix_width]); std::int32_t* F_row = &(pimpl_->F[i * matrix_width]); std::uint32_t pred_i = node->in_edges().empty() ? 0 : pimpl_->node_id_to_rank[node->in_edges()[0]->begin_node_id()] + 1; std::int32_t* H_pred_row = &(pimpl_->H[pred_i * matrix_width]); std::int32_t* F_pred_row = &(pimpl_->F[pred_i * matrix_width]); for (std::uint32_t j = 1; j < matrix_width; ++j) { // update F F_row[j] = std::max(H_pred_row[j] + g_, F_pred_row[j] + e_); // update H H_row[j] = H_pred_row[j - 1] + char_profile[j]; } // check other predeccessors for (std::uint32_t p = 1; p < node->in_edges().size(); ++p) { pred_i = pimpl_->node_id_to_rank[node->in_edges()[p]->begin_node_id()] + 1; H_pred_row = &(pimpl_->H[pred_i * matrix_width]); F_pred_row = &(pimpl_->F[pred_i * matrix_width]); for (std::uint32_t j = 1; j < matrix_width; ++j) { // update F F_row[j] = std::max(F_row[j], std::max(H_pred_row[j] + g_, F_pred_row[j] + e_)); // update H H_row[j] = std::max(H_row[j], H_pred_row[j - 1] + char_profile[j]); } } std::int32_t* E_row = &(pimpl_->E[i * matrix_width]); for (std::uint32_t j = 1; j < matrix_width; ++j) { // update E E_row[j] = std::max(H_row[j - 1] + g_, E_row[j - 1] + e_); // update H H_row[j] = std::max(H_row[j], std::max(F_row[j], E_row[j])); if (type_ == AlignmentType::kSW) { H_row[j] = std::max(H_row[j], 0); update_max_score(H_row, i, j); } else if (type_ == AlignmentType::kNW && (j == matrix_width - 1 && node->out_edges().empty())) { update_max_score(H_row, i, j); } else if (type_ == AlignmentType::kOV && (node->out_edges().empty())) { update_max_score(H_row, i, j); } } } // backtrack Alignment alignment; std::uint32_t i = max_i; std::uint32_t j = max_j; auto sw_condition = [this, &i, &j, &matrix_width]() { return (pimpl_->H[i * matrix_width + j] == 0) ? false : true; }; auto nw_condition = [&i, &j]() { return (i == 0 && j == 0) ? false : true; }; auto ov_condition = [&i, &j]() { return (i == 0 || j == 0) ? false : true; }; std::uint32_t prev_i = 0; std::uint32_t prev_j = 0; while ((type_ == AlignmentType::kSW && sw_condition()) || (type_ == AlignmentType::kNW && nw_condition()) || (type_ == AlignmentType::kOV && ov_condition())) { auto H_ij = pimpl_->H[i * matrix_width + j]; bool predecessor_found = false, extend_left = false, extend_up = false; if (i != 0 && j != 0) { const auto& node = graph->nodes()[rank_to_node_id[i - 1]]; std::int32_t match_cost = pimpl_->sequence_profile[node->code() * matrix_width + j]; std::uint32_t pred_i = node->in_edges().empty() ? 0 : pimpl_->node_id_to_rank[node->in_edges()[0]->begin_node_id()] + 1; if (H_ij == pimpl_->H[pred_i * matrix_width + (j - 1)] + match_cost) { prev_i = pred_i; prev_j = j - 1; predecessor_found = true; } else { const auto& edges = node->in_edges(); for (std::uint32_t p = 1; p < edges.size(); ++p) { pred_i = pimpl_->node_id_to_rank[edges[p]->begin_node_id()] + 1; if (H_ij == pimpl_->H[pred_i * matrix_width + (j - 1)] + match_cost) { prev_i = pred_i; prev_j = j - 1; predecessor_found = true; break; } } } } if (!predecessor_found && i != 0) { const auto& node = graph->nodes()[rank_to_node_id[i - 1]]; std::uint32_t pred_i = node->in_edges().empty() ? 0 : pimpl_->node_id_to_rank[node->in_edges()[0]->begin_node_id()] + 1; if ((extend_up = H_ij == pimpl_->F[pred_i * matrix_width + j] + e_) || H_ij == pimpl_->H[pred_i * matrix_width + j] + g_) { prev_i = pred_i; prev_j = j; predecessor_found = true; } else { const auto& edges = node->in_edges(); for (std::uint32_t p = 1; p < edges.size(); ++p) { pred_i = pimpl_->node_id_to_rank[edges[p]->begin_node_id()] + 1; if ((extend_up = H_ij == pimpl_->F[pred_i * matrix_width + j] + e_) || H_ij == pimpl_->H[pred_i * matrix_width + j] + g_) { prev_i = pred_i; prev_j = j; predecessor_found = true; break; } } } } if (!predecessor_found && j != 0) { if ((extend_left = H_ij == pimpl_->E[i * matrix_width + j - 1] + e_) || H_ij == pimpl_->H[i * matrix_width + j - 1] + g_) { prev_i = i; prev_j = j - 1; predecessor_found = true; } } alignment.emplace_back(i == prev_i ? -1 : rank_to_node_id[i - 1], j == prev_j ? -1 : j - 1); i = prev_i; j = prev_j; if (extend_left) { while (true) { alignment.emplace_back(-1, j - 1); --j; if (pimpl_->E[i * matrix_width + j] + e_ != pimpl_->E[i * matrix_width + j + 1]) { break; } } } else if (extend_up) { while (true) { bool stop = false; prev_i = 0; for (const auto& it: graph->nodes()[rank_to_node_id[i - 1]]->in_edges()) { std::uint32_t pred_i = pimpl_->node_id_to_rank[it->begin_node_id()] + 1; if ((stop = pimpl_->F[i * matrix_width + j] == pimpl_->H[pred_i * matrix_width + j] + g_) || pimpl_->F[i * matrix_width + j] == pimpl_->F[pred_i * matrix_width + j] + e_) { prev_i = pred_i; break; } } alignment.emplace_back(rank_to_node_id[i - 1], -1); i = prev_i; if (stop || i == 0) { break; } } } } std::reverse(alignment.begin(), alignment.end()); return alignment; } Alignment SisdAlignmentEngine::convex(const char* sequence, std::uint32_t sequence_size, const std::unique_ptr& graph) noexcept { std::uint32_t matrix_width = sequence_size + 1; std::uint32_t matrix_height = graph->nodes().size() + 1; const auto& rank_to_node_id = graph->rank_to_node_id(); // realloc realloc(matrix_width, matrix_height, graph->num_codes()); // initialize initialize(sequence, sequence_size, graph); std::int32_t max_score = type_ == AlignmentType::kSW ? 0 : kNegativeInfinity; std::int32_t max_i = -1; std::int32_t max_j = -1; auto update_max_score = [&max_score, &max_i, &max_j](std::int32_t* H_row, std::uint32_t i, std::uint32_t j) -> void { if (max_score < H_row[j]) { max_score = H_row[j]; max_i = i; max_j = j; } return; }; // alignment for (std::uint32_t node_id: rank_to_node_id) { const auto& node = graph->nodes()[node_id]; const auto& char_profile = &(pimpl_->sequence_profile[node->code() * matrix_width]); std::uint32_t i = pimpl_->node_id_to_rank[node_id] + 1; std::int32_t* H_row = &(pimpl_->H[i * matrix_width]); std::int32_t* F_row = &(pimpl_->F[i * matrix_width]); std::int32_t* O_row = &(pimpl_->O[i * matrix_width]); std::uint32_t pred_i = node->in_edges().empty() ? 0 : pimpl_->node_id_to_rank[node->in_edges()[0]->begin_node_id()] + 1; std::int32_t* H_pred_row = &(pimpl_->H[pred_i * matrix_width]); std::int32_t* F_pred_row = &(pimpl_->F[pred_i * matrix_width]); std::int32_t* O_pred_row = &(pimpl_->O[pred_i * matrix_width]); for (std::uint32_t j = 1; j < matrix_width; ++j) { // update F F_row[j] = std::max(H_pred_row[j] + g_, F_pred_row[j] + e_); // update O O_row[j] = std::max(H_pred_row[j] + q_, O_pred_row[j] + c_); // update H H_row[j] = H_pred_row[j - 1] + char_profile[j]; } // check other predeccessors for (std::uint32_t p = 1; p < node->in_edges().size(); ++p) { pred_i = pimpl_->node_id_to_rank[node->in_edges()[p]->begin_node_id()] + 1; H_pred_row = &(pimpl_->H[pred_i * matrix_width]); F_pred_row = &(pimpl_->F[pred_i * matrix_width]); O_pred_row = &(pimpl_->O[pred_i * matrix_width]); for (std::uint32_t j = 1; j < matrix_width; ++j) { // update F F_row[j] = std::max(F_row[j], std::max(H_pred_row[j] + g_, F_pred_row[j] + e_)); // update O O_row[j] = std::max(O_row[j], std::max(H_pred_row[j] + q_, O_pred_row[j] + c_)); // update H H_row[j] = std::max(H_row[j], H_pred_row[j - 1] + char_profile[j]); } } std::int32_t* E_row = &(pimpl_->E[i * matrix_width]); std::int32_t* Q_row = &(pimpl_->Q[i * matrix_width]); for (std::uint32_t j = 1; j < matrix_width; ++j) { // update E E_row[j] = std::max(H_row[j - 1] + g_, E_row[j - 1] + e_); // update Q Q_row[j] = std::max(H_row[j - 1] + q_, Q_row[j - 1] + c_); // update H H_row[j] = std::max(H_row[j], std::max( std::max(F_row[j], E_row[j]), std::max(O_row[j], Q_row[j]))); if (type_ == AlignmentType::kSW) { H_row[j] = std::max(H_row[j], 0); update_max_score(H_row, i, j); } else if (type_ == AlignmentType::kNW && (j == matrix_width - 1 && node->out_edges().empty())) { update_max_score(H_row, i, j); } else if (type_ == AlignmentType::kOV && (node->out_edges().empty())) { update_max_score(H_row, i, j); } } } // backtrack Alignment alignment; std::uint32_t i = max_i; std::uint32_t j = max_j; auto sw_condition = [this, &i, &j, &matrix_width]() { return (pimpl_->H[i * matrix_width + j] == 0) ? false : true; }; auto nw_condition = [&i, &j]() { return (i == 0 && j == 0) ? false : true; }; auto ov_condition = [&i, &j]() { return (i == 0 || j == 0) ? false : true; }; std::uint32_t prev_i = 0; std::uint32_t prev_j = 0; while ((type_ == AlignmentType::kSW && sw_condition()) || (type_ == AlignmentType::kNW && nw_condition()) || (type_ == AlignmentType::kOV && ov_condition())) { auto H_ij = pimpl_->H[i * matrix_width + j]; bool predecessor_found = false, extend_left = false, extend_up = false; if (i != 0 && j != 0) { const auto& node = graph->nodes()[rank_to_node_id[i - 1]]; std::int32_t match_cost = pimpl_->sequence_profile[node->code() * matrix_width + j]; std::uint32_t pred_i = node->in_edges().empty() ? 0 : pimpl_->node_id_to_rank[node->in_edges()[0]->begin_node_id()] + 1; if (H_ij == pimpl_->H[pred_i * matrix_width + (j - 1)] + match_cost) { prev_i = pred_i; prev_j = j - 1; predecessor_found = true; } else { const auto& edges = node->in_edges(); for (std::uint32_t p = 1; p < edges.size(); ++p) { pred_i = pimpl_->node_id_to_rank[edges[p]->begin_node_id()] + 1; if (H_ij == pimpl_->H[pred_i * matrix_width + (j - 1)] + match_cost) { prev_i = pred_i; prev_j = j - 1; predecessor_found = true; break; } } } } if (!predecessor_found && i != 0) { const auto& node = graph->nodes()[rank_to_node_id[i - 1]]; std::uint32_t pred_i = node->in_edges().empty() ? 0 : pimpl_->node_id_to_rank[node->in_edges()[0]->begin_node_id()] + 1; if ((extend_up |= H_ij == pimpl_->F[pred_i * matrix_width + j] + e_) || H_ij == pimpl_->H[pred_i * matrix_width + j] + g_ || (extend_up |= H_ij == pimpl_->O[pred_i * matrix_width + j] + c_) || H_ij == pimpl_->H[pred_i * matrix_width + j] + q_) { prev_i = pred_i; prev_j = j; predecessor_found = true; } else { const auto& edges = node->in_edges(); for (std::uint32_t p = 1; p < edges.size(); ++p) { pred_i = pimpl_->node_id_to_rank[edges[p]->begin_node_id()] + 1; if ((extend_up |= H_ij == pimpl_->F[pred_i * matrix_width + j] + e_) || H_ij == pimpl_->H[pred_i * matrix_width + j] + g_ || (extend_up |= H_ij == pimpl_->O[pred_i * matrix_width + j] + c_) || H_ij == pimpl_->H[pred_i * matrix_width + j] + q_) { prev_i = pred_i; prev_j = j; predecessor_found = true; break; } } } } if (!predecessor_found && j != 0) { if ((extend_left |= H_ij == pimpl_->E[i * matrix_width + j - 1] + e_) || H_ij == pimpl_->H[i * matrix_width + j - 1] + g_ || (extend_left |= H_ij == pimpl_->Q[i * matrix_width + j - 1] + c_) || H_ij == pimpl_->H[i * matrix_width + j - 1] + q_) { prev_i = i; prev_j = j - 1; predecessor_found = true; } } alignment.emplace_back(i == prev_i ? -1 : rank_to_node_id[i - 1], j == prev_j ? -1 : j - 1); i = prev_i; j = prev_j; if (extend_left) { while (true) { alignment.emplace_back(-1, j - 1); --j; if (pimpl_->E[i * matrix_width + j] + e_ != pimpl_->E[i * matrix_width + j + 1] && pimpl_->Q[i * matrix_width + j] + c_ != pimpl_->Q[i * matrix_width + j + 1]) { break; } } } else if (extend_up) { while (true) { bool stop = true; prev_i = 0; for (const auto& it: graph->nodes()[rank_to_node_id[i - 1]]->in_edges()) { std::uint32_t pred_i = pimpl_->node_id_to_rank[it->begin_node_id()] + 1; if (pimpl_->F[i * matrix_width + j] == pimpl_->F[pred_i * matrix_width + j] + e_ || pimpl_->O[i * matrix_width + j] == pimpl_->O[pred_i * matrix_width + j] + c_) { prev_i = pred_i; stop = false; break; } } if (stop == true) { for (const auto& it: graph->nodes()[rank_to_node_id[i - 1]]->in_edges()) { std::uint32_t pred_i = pimpl_->node_id_to_rank[it->begin_node_id()] + 1; if (pimpl_->F[i * matrix_width + j] == pimpl_->H[pred_i * matrix_width + j] + g_ || pimpl_->O[i * matrix_width + j] == pimpl_->H[pred_i * matrix_width + j] + q_) { prev_i = pred_i; break; } } } alignment.emplace_back(rank_to_node_id[i - 1], -1); i = prev_i; if (stop || i == 0) { break; } } } } std::reverse(alignment.begin(), alignment.end()); return alignment; } } spoa-3.0.1/src/sisd_alignment_engine.hpp000066400000000000000000000040231351220300700202720ustar00rootroot00000000000000/*! * @file sisd_alignment_engine.hpp * * @brief SisdAlignmentEngine class header file */ #pragma once #include #include #include #include #include "spoa/alignment_engine.hpp" namespace spoa { class Graph; class SisdAlignmentEngine; std::unique_ptr createSisdAlignmentEngine(AlignmentType type, AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g, std::int8_t e, std::int8_t q, std::int8_t c); class SisdAlignmentEngine: public AlignmentEngine { public: ~SisdAlignmentEngine(); void prealloc(std::uint32_t max_sequence_size, std::uint32_t alphabet_size) override; Alignment align(const char* sequence, std::uint32_t sequence_size, const std::unique_ptr& graph) noexcept override; friend std::unique_ptr createSisdAlignmentEngine( AlignmentType type, AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g, std::int8_t e, std::int8_t c, std::int8_t q); private: SisdAlignmentEngine(AlignmentType type, AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g, std::int8_t e, std::int8_t q, std::int8_t c); SisdAlignmentEngine(const SisdAlignmentEngine&) = delete; const SisdAlignmentEngine& operator=(const SisdAlignmentEngine&) = delete; Alignment linear(const char* sequence, std::uint32_t sequence_size, const std::unique_ptr& graph) noexcept; Alignment affine(const char* sequence, std::uint32_t sequence_size, const std::unique_ptr& graph) noexcept; Alignment convex(const char* sequence, std::uint32_t sequence_size, const std::unique_ptr& graph) noexcept; void realloc(std::uint32_t matrix_width, std::uint32_t matrix_height, std::uint32_t num_codes); void initialize(const char* sequence, std::uint32_t sequence_size, const std::unique_ptr& graph) noexcept; struct Implementation; std::unique_ptr pimpl_; }; } spoa-3.0.1/test/000077500000000000000000000000001351220300700134255ustar00rootroot00000000000000spoa-3.0.1/test/data/000077500000000000000000000000001351220300700143365ustar00rootroot00000000000000spoa-3.0.1/test/data/sample.fastq000066400000000000000000001447361351220300700166760ustar00rootroot00000000000000@0 GTCTAATGCGCTTTGTTGGCGGTGGCTTGATGCAGGGGCTAATCGACCTCTGGCAAACGGTTTTTCCATGACAGGAGTTGAATATGGCATTCCGTAATCCCTCGTCGATGATCCAAGCGGGAGCGGTCTAAGTATTGCGCATATGCGCAGGGATTCAGTCTTGCGCCGCAAACAATGCGGTCTTGTACGCTCCGGCAGGCTGGGACATTGTGTCAGCCGCAGTCCACAGGCGTCCTGCCAGCAGTGGCTGGAGGAAGCCCACTGGCGTACTCTGACACCGACGAATTTACCCAGGTTTGCAGGAGGGCACAATGAGCCAGCATTTACCTCGGCTTCTGCCGCACAGCCCGGCATCTCATCGTAAGCAGAAAAACTGTCAGAATTACCCTCCCCCTGGCTCGCCATGTGCGCATTACGTTGAGTTAACCGCGAGGAGAGGTTGATTCACCCACTTGAGCCCGCGCGGTGGTTGCCTGTACACAGCCCACGGCAGGATACAC + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! @1 ATGATGCGCTTTGTGTTGGCGCGGTGGCTTACATCTGCAGGGGCTAATCGACCTCTGGCAACCACTTTTCCATGACAAGGATTGAATATGGCATTCAGTAATCCCTTCGATGATCGCAGGGAGCGTTAGTCGATATTGCGCCACAATGCGCAGGAGCGTAATTCAGTCTGTGGCCGCAACAAGCGTGGCGTCTTACCGGCAGGGCTGGGACATTGTGTGTCGACCGCAGCTTTACAGGCGGTTGGGTCGGAGCAGTGGCTGGAAGCCCACTGGGGGTCAACTCTGACACCGACTGAAGTTTTACCCGATTCAGGAGAGGCACAATGAGCCAGCAAGTTTACCTGTTATGTGCCCGCACAGCCACAGGCATCTGGATGGCAGAAAAACTGTCAGAATTACTAGCCGCTGCGGAGCGTGGGCGCATCTCCTCGTTGAGTTAACCGCCAGAGTTGACTTCACATTACTGGCCCGCGCGGTGGTCCGCCGACTACGCAATAGCAGATACGC + /0/./1000/1-0-0...-...--..-..-.-,,/,----.-//--../..----.,,-,,-..,.--++-,,-,,-,,--+++,/.,-..//-...-.-.------+,-.,,,,---,.--..,-+,.,-,-1//.//,+.,,,/-/,-,,,,++++0.,-,/,+-,,.+,,+*,++,,++-,,-*,++**+***+++,**),**+)*,+,+,**)*,))),)*++,*)++)**,),*)*+((+),))+**++*,+****)))+))+*)-+*,+*++*+**-***++**)*,,++-+-,,*,*++*+++*-+++,,*,-.+,,,-+++*++/++,.++,,,./-,,+-,+,+,----/,.,-.-./..-.-,+,.,,,,,.--/----,++,.--,-,++-,--.,,---,-.-,-++++,++-+-,+,,+**-+++*+*-+++,++****-*,+++*,,+**++****+*++*)****+***,++,*,+,,+-+++,,,+,,.,, @2 GATCCTCAGGTTTGTTGGCGCGGTGGCTTGATGCAGGGGCTAATCGACCTCTGGCAAACCACTTTTCCATGACAGGAGTTAGAATATGGCATTCAGTAATCCCTTCGGCCGGATGATCGCAGGGAGCGTTTTACATATTGCGCAATGCGCAGGGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTTACCGGCAGGCTGGGACATTGTGTGTCAGCCGCAGTCACACAGGCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGGCGTACTCTGACACCGACGAATTTACCCAGTGCAGGGAGGCACAATGAGCCAGCATTTACCTTTGGTCGCCGCACAGCCCGGCATCTGGGATGGCAGAAAACTGTCAGAATTACCCTCCGCCTGGAGCTGTGGCGCATTACGTTGAGTTAACCGGAGAGGTTGATTCGCCATTACACTGGCCCGCGCGGTGGTTATGCCGGACTAGCGCAAGCAGATACGC + -+.+-+--+-.-,-,.+,,,,++,+++*,++,/+,+,,,++,+,,,,+-,-,+,++,,+,,+*,+*****,++,+0-+0/,+++-++.,-,-.+++,+,-,,,,/*-*+*,,,-/,,--------020/1,,../1.0----.-0/..1//.--./.1/00////.1..///.../0/0.2/0/.0.../0/1000/0/..3//3//1//--0-//.//.0/...0/0030/2../-1-./1..1..--,,-,.--/.-///0//./...0.--./-..0.--..,,/.--,-0,--,--...10/0/-//..040/0-/././--/.././.0.1..0.////////10./2//./-0/3000.,,.././/-../--.1..1/.---./--..//-/-.0..0---/-------../.-.,/-.,-/--.---/.-/..1./2//.--/...--00---..-..-/..-,///----0.0 @3 ACCTTAATGCGCTTTGTTGGCGCGGTGGCTTGATGCAGGGGCATCGACCTCGTAAACCAACACTTTTCCAGATGACAGGAGTTGAATATGGCATTCAGTAATCCCCCTTCTAGATGATCGCAGGGAGCGTTTTACATCTGCGCAATGCGCAGGGGCAATTCAGTCTGGGCCGCAACAATGCGTCTTACCGGCAGGCTGGGACATTGTGTGTCAGCCGCAGTCACATGCGTCCTGTGCCAGCAGTGGCTGGAAGCCACTGGCGTACTCTGACACCGACGAATTTTACCCAGTTGCAGGAGGCACAATGAGCCAGCATTTGCTTTGGTCGCCCGCACAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAATTACCCTCCGCACTGGAGCTTGGCGCATTACGTTGAGTTAACCGGAGAGGTTGATTCGCCATTACTGGCCCGCGCGGTGGTTGTCGCCGGACGCTCACGCAAGCAGATTGAGGTC + -,-..-./0//101..-,,-/-/-.-//./0-,.2-../,0-----1-./..-/0..///./////-.-...-0..0///..-...//-//././0--10.00-11....-//0101--/////..25211..//0-/0.--/./-/,-..--.-..0/1-.//--/---.,.--/...0--/-,-,,,--//./-.--.1/./...-,/,,--.-.0.--,-/0-.0./,-,/....0/0//0/..-/.-/...1...0///////.1./0././010..0.-..0/0.1.-2///.../.50001.00/002/////001..1/10/.././//00./.12//10/-.///0/.0.1./4./--/.//0/--....--.0../..///-..-/0.-.-.0/,..-.+++++.,,,-,-.,/-+,-,+,--,++-,,,++.+++*,+,*)**)*)*),**+++*-,+,+*,,+**,+)***),+ @4 ATGATGCGCTTTGTTGGCGCGTGGCTTGATGCGCAGGGCTAATCGACCTCTGGCAACCACTTTTCCATGGACAGGAGTTGAATATGGCGGCATTCAGTAATCCCTGTCGCCCATGATCCGCGCAGGGAGCGTTTACATATTTGCGCAATGCGCAGGGGCAGGAGTTCAGTCTGTGGCCGCAACAATGCGTCTTACCGGCAGGCTGGGACATTGTGTCAGCCGCAGTCAAGGAGCGTCCTCAGCAGTGGCTGGAAGCCCACTGGCGCGTACTCTGACACCGACGAATTTACCCAGTTGCAGGAGAGGCACAATGGCTCAGCATTTACCTTTGGTCGCCGCACAGCCCGGCATCTGGATGGCAGAAAACTGTCAGAATTACCCTCCGCCTGGAGCGTGGCGCATTACGTTGAGTTAACCGGAGAGGTTGATTCGCCATTAGACCTCCACCATTCTTGGTCCCGCCTGACCGCAAGCGAATACCAC + ./0-..../00/00..///01//00//....1///0./-.0001/0.2/0/...-..31/300-----/---...,-/-.../..-,-...--.--,-,,--,,-.,/,,,//,,-,,,..--0----,/--,,,-.../.--,,0-,-.-.....-..-./-./.../1..0.-...-0-./...0.-,,-,,,.-...-.-,,+-0..-.-,/-/,-.,/-.--++,,,+.---,+,--/0//0...-----//.1.-./--..--.,-/--../--..012/11/./12302/.///0//0./0/000/0///1200/01//0000//1//0100///.//./////.0//./1//./.0/0000/.-./001/0.././/0..../-.0--,--/2-.-/.--+,,,,,,,+,,+,,.,-,+.+++*,+,-*+*+**-++++,-..+,,+******.++-+.+,,-,,,+++,-++.+- @5 ATGATGCGCTTTGTTGTGCGCGGCTTATGCCGGGAGTGGACCTCTGGCAACCACTTTTCCATGACAGGGTTGAATATGGCATTCAGTAATCCCTTCGGATGATCGCAGGGAGCGTTTTACATATTGCGCAATGCGCAGGGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTTACCGGCAGGCTGGGACATTACGCCGCAGTCACAGGCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGGCGTACTCTGACACCGACGAATTTTACCCCAGTGCAGGAGGGCACAATGAGCCAGCATTACCTTTGGTCGCCGCACAGCCCGGCATCTGGCCTGAGAAAACTGTCAGAATTACCCTCCGCCTGGAGCGTGGCGCATTAACGTTGAGTTAACCGGAGAGGATGATTGGGCCATTACTGGCCCGCGCGGTGGTGCCAGCAAGCAGATACGC + +++++,,,,,--,.+.++,+++,,+++,,++---++,+,/+--..-//--,...-/-,,,-,--,-+-,-,-./../-...-----2..1//./.,/---./--...--,-./430.-../00//--..-../-/-.--,-..-,,.-/./---,--,,.------.,-0---,,-,-.--.--,++0,,,,,,,,,,...-,,+,+-,-.-,,---,-/-,.0--,+++-+++-,,.-----,---,-/.,,+,,-00/0/.,,,,,.--,/----,.+,-,/---/---,,,.--,,----,.,,,-,+.,,,----,,/..,,,,,,,,,++-,.-,.,+++-.,,+***+**,+**++**++++*+*,++**,*++****)*****++******))))),)*++*+++-+,+,+++--,+++,++,+++++-,,.,,+++,-,, @6 ATGATGCTATTTGTTGGCGCGGTGGCTTGATGCAGGGCTAATCGACCTCTGAAACCACTTTTCTCAATGACAGGGTCGAATATGGCATTCATCAATCCCTTCATATGATCGCAGGGAGCGTTTTACATATTTCTCACAGATGCGCAGGGGCAATTCAGTCTGTACGCCCAACAAATGCGTCGCAGCGGCAGGCCGGACATTGTGTGTCAGCCGCAGTCACAGGCGTCCTGCCAGCAGTGGCTGGAAGGTACATGGCGTACTCTGACACCGACGAATTTTACCCAGTTGCAGGAGGCACAATGAGCCAGCATTTACCTTTACCTCGCTAGACACTAAGCATATGGCCTAACCAAAACTGTCAGAATCTAGCGCCTGGCCTACGTGGCGCATTACGTTGAGTCAAGCGCCAGGTTGATTCGCCATTCTGGCCCGGCGGTGTATGCCCGACTAGCGCAAGCAATACGC + ,,0,,-,+-,+,,,+++,-,,,,,-,++,++,-,,,,,+,-,-,,+,.+/+++-+,-,,-,+*,.,//1,.,,,+,+.,---0,,,,+,--+.,,,,-,,,.,,.-/--,--.-/-/.-/-,,,,/.//.--,,,/--..././---.-0--//-/---..--,/,-,,,,-.-/.00,,++.,---./1---,,,-.----..+,+,-,,.,---+--/-.-.-,,++,,----..--.-,,,-.,,-,---,-.-,--/-/--/---,,-,-.,,,-,,+,+,.+++,+++*-*++++++*+*+**+++++*+**)))*))*+*(()))*)()))***)*))*)()((+(+)*)+)*(())))))()***)*)++*)**+,***+,++*+-++**)***,**++**,+,,+-+,.,,,,--,+,++,+++++++,,,++,+***,++++,+,,-,.,.,,-,, @7 GATGCGCTTTGTTGTGGCGCGTGTCGCTTTACGATGCAGGGCTAATCGACCTCTGGCAACCACTTTTCTCATGACAGGAGTTGAATATGATTCTTCAGTAATCCCTTCATATCAATCCGCAGGAGCGTTTTACAGTCCGCGCAGATGCGCAGGGCAATTTCAGTCTGTGGCCGCGCCAACAATGCGTCTCTTACCGGCAGGCTGGGACATTGTGTCAGCCGCAGTCACAGGCGTCTTCAGTATCTTACCCTCGAAGCATTTTTGCGCTACGCAGCGACGAATGATGTATATCCAAGCGTGTTAATAACACCCCAGCATTTCACCTTGTATGCCGCAACACAGCCCGGCATCTGGATGGCAGAAAAGCTGTCAGCGAATCTACTCCGCCTGGAGCCCTCTTTGAATTACGCGTTTATCAGTTGGACCGTTGGTTTTATCAGCGCGGTTGATTCACTTGGTGCGCTAGTTTCTTTACCTGACCGGCGGATAAC + /.-/./----./---./...-/,.,,-----//-..2/.-/-,--.--/0/1/0...//0////.//.---...10002/1/1..-331/0-/,//..10//21/.1/./..0./0.01///020020./--0//.1.0-.-.-0.1.0/..///.//./....0/././../.--0,-.//022//---//--.-.././-,,++,.---/,,+,*,,+,,-++,,.,-+.+*,)**))*+)**)*****))**)+**),,*)(,)())+(*(+'((()(()')''&''(('())*,*)('(*'*()*((()+(+*)))))**()))((+)()((+((+*)))(()*))()()()()(*)(***)*)())((+()**))))()***)*****))**+**)**),*)+*,,**))+)**),),))*,-))****+),-.).0**++*+*-+,++*+.**-+-+,++++,,*,+))*-*+*,*,-**,*+*) @8 ATGATGCGCTTTGTTGGCGCGGTGGCTTGATGCAGGGGCAATCGACCTCTGGCAACCACTTTTCCATGACAGGAGTTGAATATGGCATTCAGTAATCCTTCAGCTGATCCAGCGGAGCGTTTTACATATTGCGCAATGCGCAGGAGAATTTCAGTCTGTGGCCGCAACAATGCGTCTTACATGCACAGGCTGGGACATTGTGTCAGCCGCAGTCAGCAGGCGTCCCTGCCAGCAGTGGCTGGAAATCCCACTGGCGTACTCTGACACCGACGAATTTTACCCAGTTGCAGGAGGCACAATTGGCTCTACGATTTACCTTTGGTCGCCGCACAGCCCGGCATCTGGATGGCAGAAAAACTTCAGAATTACCCTCCGCCTGGAGCGTGGCGCATTACGTTGAGTCAACCGGAGCGATGATTCGCCATTACTGGCCCGCGCGGTGGTTGCCGGACTAGCGCAAGCAGATACGC + ,,..-./,--...-.-,--/,,-..-,-/--..-,-..--0...../..0..0-.-//0.//.-----/../0/+/,,.---/--------/-..--.--.-..-...--.,-.,,,--,.,++---,/-.,,-,-,--,-.,,++,,,.0-,,,/,+,++,.,/++,----0-,,,.,,,/,--/--.0-,+,-..-.0/-..-....00...0-0..15---..2//01/010011/.-..//1.1/1/01000--./0.-.0/./../-/.//./,..3.-..,,---/.-.--0/--/-,,,+-0.///.--.....,,,,-,,+,-.--.-,,.--,.-/-.---/,+,-02/0..-,,,,,,-++,+,,.,---,--.-,-+,--.,.-/0+++****+**+++*+,**,,-++,+-,,++++,***+-****))+)****)))*,)*+***,*+,--****+* @9 ATGATGCGCTTTGTTGGCGCGGTGTAGTCGTGAGCAGGGGCTAATCGACCTCTGGCAACCACTTTTCCATGACAGCGAGTTGAAGTGGGATTCAGTAATCCCTTCGATGATCCGCAGGAGCGTTTACATATTGCGCAATGCTAGCAGGGGCAATTCAGTCTGTGGCCGCAACAATGCGTGTCTTACCGGCAGGCTCGGGACATTGTGTCAGCCGCAGTCACAGGCGTCCTGCCAGCAGTGCTGGAAGCCACTGGCGTACTCTGACACCGACGAATTTTACCCAGTTGCAAGGAGCTAAGACAATCCAGCATTTACCTTCTTACTCCCGCACAGCCAGCGGCATCTGGATGGCAGAAAAACTGTCAGAATTACCTCCGCCTGGAGCGTGGCGCATTACGTTGAGTTAACTAGAGAGGTTGATTCGCCATTACTGGCCCGCGCGGTGGTTGCCGGACTAGCGCAAGAGCAGATACA + ,,-+,,,-,,----,,,,-.-,----.---,/+.,+,,,-,+---,,-.----./,+.,./.-/-,+,,+.,-.,.+----+,.,,------,,+--.-,,-,,.,,-,-,,/+,,,-,,-//.,+-,--.,0,.,.-,,*,+-,.-,./,,.-..-,..-,,,.-,.,,,,0..-3-.--,,-,,---.--/-.-.,,-/.--.-,,,,.-.,..-..-/./././/-.../0.///./..0-/...-0...../../0/./2////./0012101/-.,.10...--.-..-0,./--./..-..-/0//1/....0-/.-----0--./...-,,,./0.-.---.----/,./02/1.0-,,,-.----.--.//.0.-----/.--0/...00/.../---.--,-,-,-,.-1...-.-/0/-.-//-...//.///.0/.-/.---/.---.0-.-.--.1/---.. @10 ACTTATCCGCTTTGTTGGCGCGGATGATGGTGCAGGGGCTTACGACCTCTGGCAACCACTTTCCATGACAGGAGTTAATATGGCATTGAGTAATCCTCGGTTCGATCAGCCGAGGCTAGTTTTACATGCGGATCCGCAGGGGCAATGCGCTTGGTGGCAACAATCGGCGTCTACCGGCAGGCTGGGACATGTGTGTCAGCCGCAGTCACAGTGCTTTCAGCAGGCTGGAAGCCCACTGAGTACTCTGACACCGACGAATTTACCCAGTTGCAGGAGGCTAATGAGCCAGGATGTTGTGGCCTTCTTACCGCACAGCCGGCATCTCATGGCAGAAAAACTGTCACGATGGCTCCGCTGCGGCTCTTAGCATTACGTTAGAGTTAACCGAGGCTTACTTCGCCATTACTGGCCGCCCCTTATGTGCGTACAAGGGCCCAAGCAAGATACGC + ,,,,,,,+,,,-+++*+,,,+++++++*),+++++**,*,++,+-+-,./,,,-,,.,,,,+,,,+,--.---,,++,---,-----,,,.--.///-0,,.+,,..+.,,-,,-+,,./.,++++,,,,,-,,+,--+-,,+,-.-+/+++++,,+.,,,-,,,,----+++,-,-.-,/---,-1-.-/--./-0-./-1....-..0-..1/-..-//..1////././-.--0.--./0--..-0.,--,.--,,,++-,--..+----,,,+**++,+++,-+++,*.,+++*.++**+-+*)*--+,++,++++**,++*+***,+,+-,+++-*+*,*++**,**,+-++*-++,+++*--++,+,.+*),+-******)++*))),****+)*,****,*+*,*,**+++*+*++)*)*******+****,+*,*++++,+ @11 ATGATGCTTTGTTGGCGCGGTGGCTTGATGCAGGGGCCACGTAGACCTCTGGCAACCACTTTTCCATGACAGGAGTCGCAGAATATGGCATTCAGTAATCCCTTCGATGATCCGCAGGGAGCGTTTTACATATTGCCAATGCGCAGGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTTACCGGGCAGGCTGGGACATTGTGTCAGCCGCATCACAGGCGTGCTGCCAGCAGTGGCTGGAAGCCCACTGGCGTGTTACTCTGACACCGACGAATTTTTACCCAGTTGCAGGAGGCACAATGAGCCAGCATTTACCTTTTGGTCGCCGCACAGCCCGGCAATCGTATCGGCAGAAAAACTGTCAGAATTATAAGCGCCTGGGAAGCGTGGCGCATCGTTGAGTTAACCGGAGAGGTTGATTCTCATTACTGGCCCGCGCGGTGGTTGCCGGACTAGCGCAAGCATACGC + /02/./.-..-.-----.,+,,.-.,.,--..-..-0,/.--,0//.000//.-/-//0./..-0--.-../01////11/0/100//00/0///1/.10000.0../.0/.210//000046101/.0/10//2...//0/-...-//.1.0....20.0/././...//0/.0,-,,,-.----...0/-----001/0/...-/./-./--./---/.,-,--/././/.../..../../..0.//1.-.--.-,--.-/1.1/../.0./.0/.,-.-/..--/-.,-.,-,,.--.,++,,-/-../-/.2.--,-,,---,,,-.,-..,,,+-,+,,+,,,,,,++..10,.+,,-,-/,,++*-.++**++++*,-+,+,*++,,+,/,+,+,--+,,,,,+,+,+.*+,,+-*++,,,+-----,,--,,+,,+,+-,,++.-+,,+.,+,-/+++,,+ @12 ATGATGCGCTTTGTTGAGGGGTGGCTTGATGCAGGGTAGAGACTTACATAGCCTGGCAACCACCTTCCATGACAGGAGTTGAATATGGCATTCAGTAATCCCTCATGGGTCCACATCTAGCAGGGAGCGTTTTAAGTGCCCAATGCGCAGGGGCAATCGGTCTGTGGCCGCAACAAATGCGTCTACCGGCAAGGCTGGGACATTGTGTCATCGCATAAGAAGGCGTCCTTGCCAGCAGTGGCGGGAAGCCCACTGGCGTACTCTGAGACACCGACGAATTTACCCAGTTGCAGGAGGCAAGTGGCTCAGCATTTACCTTGGTCCCTAACACACAGCCCGGCATCGGATGGCAGAAAAACTGTCAGAATTACCCTCGCCTGGAGCGTGGCATTACCGTTGAGTTAACCGCGAGAGTGATGTCGCCATTACTGGCCCGCGCGGTGGTTGCCGGACTAGCGCAAGCAGATACGC + --.0-..0-------,,,.-,,./---/--./--++,,,+.-,,.+,,-,.,,0--++-+-,,,+,+,+,+*.,-+/+,+*+,,+,++----.,+.,--,..,+./,,,++++-,.-,*.,,,,,--.1../,+,,,,,++,,-0.+,-,,,,,,.,,+*-,+-,,,,+.-+,-.-.,.0--,+,,,,,--.-.1,,+,,0./..--,,+/,,/-----,0/21-/-.--./,.-..--/,-,-.-.,.,0----.-,,-.-.1/,,,,...---/+++--+-*,-++*+++++++-+++++,+**+++.-,,,++++,+*,*+*****,+,,,,,,,,,.--,,-,-,,--,,-./1./--..-.--.,.,--,..../0.//0.-/.//./..1/0...0.///0/...///./..--/,-000/1.///.----..-,/.,-,.,-,+.-,-,.-,---,+.,,+,-, @13 ACATGATGCGCTTTGTTGTGGCGCGGTGGCTTGATGCAGGGGCTAATCGACCTCTGTGGCAACCACTTTTCCATGACAGGAGTTGAATATGGATTCAGTAATCCCTTCGATGATCCGCGCAGGGAGCGTTTTACATATTGCGCAATGCGCGCAGGGGGGATCTTCAGTCTGTGGCCTACGCAACAATGCGTCTTACGGGCAGGCTGGGACACTATTGTGTGTCAGCCGCAGTCACAGGCGTGCTGCCAGCAGTGGCTGGAAGCCCACTGGCGTACTCTGACACCGACGAATTTGTACCCAGTTGCAGGAGGCAATGAGCCAGCATTTACCTTTGGTCGCCGCAACGCCCGGCATCTGGATGGCAGAAAACTGGTACGAATCACCTCCGCCTGGAGCGTGGCGCATTACGTTGAGTTAACCGGAAGAGGTTGATTCGCCATTACTGTACCGCGCGGTGGTGCTCCGGACTAGCGCAAGCAGATACGC + ..0-/....-..-...---...-/-,....--0-0....//00-01111101300./..--0.///.1/.-/..--...-////////3/..2./10..//././,.,-,--.--1--0.......23/...,/.0./1.//../-.,,,/--,--++,,-,-+-.-/0.--,+,,++.++++,-1+20,-,*++****-+++-+****,,++-.,--,,++,*-,.,.-,,--..0,/,+,*+++,,,-------,,/--..-/../.1-/,-.-2-./.0..,--,-,-,,,.,..,1-0/.--,-.----,-0-,,.---..//,----,-+-,,-.----0.0--...-0--//./...---001/3./--0-//..-//..1/2//00//00./-..00//01/0./.././00//.-010/...////1./110.0.-..././/0//.20../.-,/-,..----0--.-.././.2./ @14 ATGATGCGCTTTGTTGGCGCGTGGCTTGATGCAGGGGCTAATCGACCTCTGGCAACCACTTTCCATGACAGGAGTTGAATATGGCAATTCATGTAATCCCCTTCTATTGACTCAGGCATGGAGCGTTTTACATATTGCGCAATGCGCAGGGCAATTCAGTCTGTGGCCCGCAACAATGCTTTGCCTTACCGGCAGGCTGGGACATTGTGTGTTTAGCCGCAGTCCACAGGCGTCCTTGCCAGCAGTGGCTGGAAGCCCACTGGCGTAGTACTGACACCGACGAATTCCCAGTTTGCAGGAGGCACAATCGGCTACAGCATTTACCTTTGGTCCCGCCGCACAGCCCGCGGGCATCTGGATGGCAGAAAAACTTGTCAGAATTTACCCTCCGCCTTCGGAGCGTGGCGCATTACGTTGAGTTAACCGGAGAGGTTTTGATTCGCCATTACTGGCCCGCGCGGTGGTTGCCGGACTAGCGCAAGCAGATACGC + /.--,----,--.-,--,,------,,----/,-,-.-+.....//00./.-,-/,...-/.,+,,,+.,,-0+.------.-,,,-,--,+--,+/-.-,++,+++++,-++-..,,-,.,-./--/++-----,,+,+++-,,+,++**++++,,,,.0-,.,,-++.++-.0,//+,.-++,,++++,+.--.-+,+++//..-,,+,++,+.,.+--+++**-,-+--*++++,+,/-//--,--+/-,-,-,---/-.-/,+++,--,,,--0,,----,-.--...-,.,,,,-+--++-,,,.-+*+--+-./..-..2.---.,,+.,.,+,,.,,+*+++*-++++--,+,++,,++,-021,-++,+++-+-++,+-,,--/-,-,----0//.---///..0//--./.1.././-./././//.21//.//01001/1100//3100/./0//..0..-/.-..--....-,.++,-.+ @15 ATGATGCGCTTTGTTGGGGTGCTTATGCCGCAGGGGCTAATCGACTCTGAAGCCACCTTCCATGACAGGAGTTGAATATGGATTCCGAGTAATCCCTTCGATGATCCTAGCAGGGAGCGTTTTACATTGCGCGACTGCGCAGGGGCAATTCAGTCTTACTCCGGCTCTTGACAATGCGTCTTACCGGCAGGCTCGGGACATTGTGTGTCAGCCGCATTACAGCGGGCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGCGGGGGTCTATACTGACACCGACGAATTTTACCCAGTTGCAGGGAGAGGCACAATGAGCAGCATTTACCTTGTTCTGCTTAAGCACAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAATTACCCTCCGCCTGGAGCTCCGTGGCGCATTACGTTGAGTTAAACCCGCAGAGGTTGATTCGCCATTACTGGCCCGCGCGGTGGTTGCCTGAGACTAGCGCAAGCAGATACA + -/0---./---.,,-,-,-/.-,,,/,/-,----//-,-/../0/-.1..-./.,.,,,,--,/-/--...-.1--/0.-..../.-,,/./../..-.--/...--,,0./....../-..,,-/.-.,-++.,-,,/..-,,/-,-,0-,+,/+-+--+.,,.,-+-.++-,+,,,.++-,-,,+.,,-,+,+++,---,+,-,+,,,+,-,..,++-++,+,+.,+,++++,,+--,-,,,-,+,,,,,+,,.+-+++,,,++*,*-+..+-,++.-+**+..+*,+,+--,++++,+++--++,+,,,,+,,,,--.,,--+,+-,++,+-,+,-,,---,-.-./.-.,,.-..,,,,,,1.21.0-,.,,,,.-----....1./////0.....//../0/0/0/0/...1-.----/---//--.-//-.1/-00/-/-..0.-.0/..--,/,+,--,+++,/-,,,,/----.,0-,-.. @16 ATGATGCGCTTTGTTGGCGCGGGTTGGCTTGATGCAGGGCTAATCGACCTCTGGCAGACCATTTTCCATGACAGGAGTTGAATATGAATTCATAGTAATCCCTTCGGATGATCGCAGGGATGTTTTACATATTGCGGGCAATGCGCAAGCGGGGCAATTCGCCAGTCCTGCCGCAACAATGCGTCTTACGGCAGGCTGGGACATTGTGTGTCAGCCGCAGTCACAGGCGTCCTGCGCCAGCAGTGGCTGGAAGCCCACTGGCGTACTCTGACACCGACGAATTTACCCAGTGCCAAGGAGGCACAATGAGCCAGCATTTACTTTTGGTCGCCGCACAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAGAAATTACCCTCCGCCTGCGGAGCGTGGCGCGATTATACGTTGAGTTAACCGGAGGTTACTTCGCCATTACTGGCCCGCGCGCGGTGTATGCCGGACCCGCGACTAGCAGATACGC + //.-/.0.-///--,-,,-.,,,.++-+-+,,+.++-+-*+,,,,,++,+-,+*+*+*+,,*,-)))++*-++**)),)*+)*+*+,**++*,*+,+,,,++++,,)*,++**++,,,++-++)**+,/,,-,-+.-+.,--.,+,-+*+++++*+-++-,,,,-..+,,+*+++-1.-.0,,/,-*++,,,,,-++++-+,-.,-.-,/,,,--,-/,.-,.01-.-.-,--.//,/-./../.---,,,-./,.-.--.-...//0./.00.....00011.--/.-,.----/....,../././-./.0/1012..///.--/--.-/,-..0.0-./1......-./...--0.202100---,,-//-.--,---.-/-/...-,-,/--,-+-.-,,.+,-/--/-,,-+.,,-,,-.+,+,+-,,-,--,-,,-.,/,,/,./-,,-/0,,--/-++.++++++,+++,+++,-+.+, @17 ATGAACACGCTTTGTGTTGGCGCGGTGGCTTGATGCAGGGGCGATCGACCTTAGGAGCAACCACTTTTCTAAGTGACAGGAGTTGAATATGGCATTTTCAGTAATCCCTTCGTACGAGGGCAGCGTTTACATATTGCGCAATGCGCAGGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTTACCGGCAGGCTCCACTTGTGTGTCAGCCGCAGTCAGGCGTTCTGCCAGCAGTGGCTGGAACTGGCCCACTGGGCGTACTCTGACACCGACGAATTTTACCCAGGTTGCAGGAGGCACAATGAGCCAGCATTTACCTTTGGTCGCCGCACAGCCCGGCATCTGGATGGCAGAAAACTGTCAGAATTACCCTCCGCCTGGAGCTGTGGGCGCATTACGTTGAGTTAACCGGAGAGGTTGATTCGCCATTACAACACGCGCGGTGGTTGCCGGACTAGCGCAAGCAGATACGC + /1..-..-/00100...--....,-,1.-///--0----,/1-..../-/,.-,,--.../220..-,,,-,,/---.-.--/----------/..-..--2-.020/././-../--././/2/0//0062211/..4//11./0./--.-//.0-.-/-/--..-.///40012../..--,,-.//--,-,--,-.-.----,,-,--/-..---.,.,++,,,,.---+/----,.+.-/.,.,.,/-,,-,--,1,-,..-.,.,,,02/./.,.,---,-.,0---,,---,,--..,-/-/-/-//../../--/.,-,,,--.--.----,-,.---./-----.///..-,,,.1,/.,.,,,,,-.,/-.,,.,/-,,,/..-,-.0.-,,-,-,,,,,-,,,.-,,,-+,,+--,-,,.,,-+-,-,-0-,-/,.-,,,,--,..-0----../-..--/. @18 AATGATGCGCCGTGTTGGCGCGGTGGCTTGATGCAGGGTAATCGACCTCTGGCAACCACTTTCATGACATAGGAGTTGAATATGCATTCTCAGTAATCCTTCGATGATCCGCAGGGAGCGTTTTACATATGCGCAATGCACTAGGGCAATTCGAGTCTGGGCATTAAAACCAATGCGTCTTAACCGGACCCAGGCTGGACAATGTGTCAATAGGCGAGTTCTAATATAAGAACGAGTTAGTGCCACATAGCAGTGGATGGGAAGGGACTAGGGTGAATCTGGTGACACCGACGAATTTGAGAGTTTGCAGGAGGAGCACAATGAGCCAGCATTTACGTCCTGGTCGCCGCGCACAGCCCGGCATCTGGATGGAAGAAAAACTGTTACGAATTACCTCGACCCCCGCTCCAAGTTAGAGATGCCGTTGAGTTAACCGGAGAGGTTGATTCGCCATTACTGGCCCGCGCGGTGGTTGCTCACGGCGGATCAGTCAGCAGATAC + +,.,,,.,/.-++,,*+-.++*,+-+,-,++,*+**,+*+,,,+,+-+,,++-+,+//++++**+,-,-+,-.-,.,*+,.-+.,-,,.+/,+-+,.-++,++,--,,,+,,---,,-0--0,*,+-,,-,+,++*,+-,))*,*++*,-+****++)))))+('(((*(*')(*((''(()''('()'''''*'(('()'''''&&&&)''''''''))'')*)&'('(')'')&'''''*')('')('*()))(*)+()+*(*)+*))+))****+*++*))*+,*+++,-+,,,**-+++,,++.++,*-,-+-,-,.-.,,-,.-/--/,,.-/.,/,--,--+/,,,.-.--,-,.,1.,,0,-,+++,--,//.-.,*-*.+-+/,+,-+-+.,,,..,,,------,+,-,--.-,/-,,-,-++.,,.++-+,-,-/+,,+/-./...----.,,-,,,+--/+-,,++,,,+-+,+-+-,.-.-,+/...++ @19 ATGAATGCGCTTTGTGTTGCGCGGTGGCTTGATGCAGGGCTAATCGACCTCTGGCAACCACTTTCCATGACAGGAGTTGAATATGGCATTCAGTAATCCCTTCGAGTGAGCGCAGGGAGCGTTTTACATATTGCGCAATGCGCAGGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTTAAGCAGGCTGGGACATTGTGTGTCGACCGCAGTCACAGGCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGGCGTACTCTGACACACGACGAATTTACCCGATGTCCAGGAGGCACAATCAGCCATTTACCGCTTTGGTCGCCGCACAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAATTACCCTCCGCCTGGAGCGTGGCGCATTACGTTGAGTTAACCGGAGAGGTTGTGATTCGCCATTACTGGCCTAGGGGTGGTTGCCGGACTAGCCCCAAGCAGATACGC + /.0../000122030.2...//./.10///.1/1.//0/1.002100/202110100015///0.1.//2///2/01.1011101122010113003241113/0003000011001201214/100222312012011100//0..00211/./2--.-----/.-.-/./0.21.-.-..0.-.0--,+0---.,-+,.++*.**--,+,,,-,,.,--,---+-,,.++.++*++*++++++++-.,,,,.,+.,-,--,-+,,,.,,,,+**+*+**-+,*+,,-*----,.-,,+-,,++*+*+,,,-*+++*+,,,,,+,,.---.-.-0.-,,/---,,/----,.-,,,.2//.,-.....1.//1///3/...-./--../0-0----,,/--.,-,-/,-+.,,---,------,,/.,-,+*+,++,.+,+++++,,+-+,+**++,--,+,-+1,, @20 GTACGAGGCTGTTGGCGCGGTGGCTTGATGCAGGGGCTAATCGACCTCTCGCAACCACCTTTCCATGACAGGGAGTTGAATATGGCATTCAGTAATCCTAGTTCGGATGATCCGCAGCGGAGCGTTTTACATATTGCGCATGCGCAGGGGCAGATTCAGTCTGTGGCCCGCAACAATGCGTCTTACCGGCAGTAGATGGGACATTGTGTCCTAGCCGCAGTCACAGGCGTCCTGCCAGCAGTGGCTGGTGGAAGCCCACTGGCGTACTCTGACAAGGACGAATTTACCCAGTTGCAGGAGGCACAATCAGCCAGCATTTACCTTTGGTCGCCGCACAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAATGACTAGCCCGCTGGAGCGTGGCGCATTACGGTCGAGTTAACCGGAGAGGTTGATTCGCTTACTGGCCCGCGCATTGGTCGTAGCCGGACTAGCGCAAGCAGAACAA + -+.-,.,.--.-,,/--.-.-1/--.,--0----.0--/../.2---/,/--/-.14/..0--0..//.///...0/-.///.0/.,,.3-./-./0/,,./--1-.0/--,.-,/,/-,--..,-,+,-/-./-,+..--0,-+,++,+++++,/++++-)-**+,**+-++,++-..--+,+***++,.++*)+****/,+++++***-*,,+---+,,+,..-/,++)+++++,+-++-++,.++,,,,,--,,,--/-0./.--0-..10.,--./..//---..0-/-.1..0.-01./-/....0//053/3.-////--/--.--/...0./--/00//.----/-,,--././0//.,/++,-,,.,,+--,-.---,---,+,,,,-,+-+/++,+..,-,,,,+**++-,-,-+-,,,-,.,.++++,+,0++-,,*+,+*-+,++++..+,,+,,-,-,-.*,,+,+ @21 ATGTTGCTTTGTTGTGCGTGGCTTGATGCAGGGGCTAATTAGACCTCTGGCAACCACTTTCTCATGACAGGAGTGAATATGGCATTCAGTAATCCTTCGATGATCCGCAGCCATAGGGGAGCGTTTTACTATCCTAATGCGCAGCGGATTCAGTCTGTGGCCGCAACAATGCGTCTTACGGCAGGCTGGACATTGTGTCCTACAGCCGCATGTCAGGCGTCCTGCCACTGGCTGCTGGAAGCCCGGCGTACTCTGACACTAGACGAATTTTACCCAGTTGCAGGGAGAGCAACGTATGATCTAAGCGTTAGCTGTTATCCCGCAAACAGCCCGGCATCTCAGATGGCAGAAAAACTGTCATAGAATTACCCTCCGCCTGGAGCGTGGCAATTACGTTGGAGTTAACCGGAGAGGTTGATTCGGGGTCTTACTCATCCGCGCGTGGTTGCCGGACTCTAAGCAGATACGC + **())*+)*+***)++)*++.+,+.+,,,***++,*++++**,-++++-++*-+-,-,,,+),*+*+-,+++*--*++++*+)*)*****++,-,,****+*++********+*),****-+6,**+*+*+,++++,-.-+,++-**+++++,-,+*++,+-++++-,/-./,,-*,+.,,-,,,-,,++-,--,-+-+++*,.++-,-,,*-+*+,.,+,+++,*,+,*+,-,+++.++++***+--*)+*-/,+.+,***-+,-,.-,,*,**-,,,,,,,*+++,++*+-,++-,+-+*,.-..,,,,,.,,,*****,+*++-,+++++.-,,,,,*++,*+,,++,.-/,--++++*))**+**+,-,,,,+-,-,,-,+****+,**+,,+*****,****))*))**+++,,+,,*+++++*+,+++*+**+,+++,***,,*****++***++.,+++,,, @22 ATGATCCAATTTGTTGGCGCGGTGGCTTGATGCAGGGGCTATAAATCGACCTGGCGGCACCACTTTTCCATGACACAGGGGAGTTGAATATGGATTCAGTAATCCCCTTCGATGATCAAGCAGGGAGCGTTTTACATATTGCGCAATGCGCAGGGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTTGAAGGCAGGCTGGGACATTGTGTGTCATGAGCCGCAGTCACACCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGGCGTACTCTGACAAAGCGACGAATTTTACCCGATCAGTTGCGCAGGGGCACAATCAGCCGAGCATTTACCTTTGGTCGCCGCACAGCCCGGCATCTGGATGAGGCAGAAAACGTCAGAATTACCCTCCGCTGGCCGCTACCCTGCGCATTACATTCAGGTTAACCCGGAGGTTGATTCGCCATTACTGGCCGCGCGGTGGTTGCCGGACTAGCGCAAGCAGATGGGA + //-..-..0-////-.../0.0.41../12.0./../01./.0001110/0000/11020342421/020/10020111012/2//031/110122212211220/0/03/13//0/1000000//15141.1../001/00.1/111./..-,.,1-.-.4-0/.-.,,,++-+,,0.--/+.+,+++,++..,,.+,,++,+-.+,-+-+..++++,+,,,,-.-/,,,,,..+..-.,.0-,/,+,++,+,.,+-,+.-.,,-/-----...-----../--10-,++-,++,/,--,+,-0,,,.,-,.,---,,,-+,-1-,/+,,.,-++++++,,+---.,.,-,.-,,0-,+--,*,-,+,-,,,,-+**+,+,*))*)**,+++*++**,)+*+*)+*+++*)****,),*)+)())))**,++**.*+*,*)*+,+*++++++.,,***+,*--,++++++++,,-.--//.--/-,,. @23 ATGATGCGCTTGTTGGCGCATGGTGGCTTGATGGTGCAGGGGCTAATCGACCTCTGGCAACCACTTTTTCCATGACAGGAGTTGAATATGGCATTCAGTAATCCCTTCGGATGATCCGCAAGGGAGCGTTTTACATGATGCGCAATGCGCAGGGAATTCAGTCTGTGGCCGCAACAATGCGTCTTACCGGCAGGCTGAGGACATTGTGTGTCAGCCGCAGTCACGCAGGCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGGCGTATACTCTGACACCGACGAATTACCCAGTTGCAAGGAGGCACACAATGAGCCAGGATTTTACCTTTGGTCGCCGCACAGCCATCCGGCATCTGGATGGCAGAAAAACTGTCAGAATTACCCTCGCCTGGAGCGTGGGCGCATTTACGTCAATTAACCGGAGTAGGTTGATTCGCTTACTAACTGCCGCGCGGTGTGGTTTGCCGAGACTAGCGCGACCCATGCC + /0//1/3.../1/../../.-.-.-.//./-,.-.......0..//00/3.3/10/.//.-23../..--/.///.0/2/.1.//110//.3.0.//.1//01....,/-/0/...-/...//./120//..///1/.//.0/.//0.0./.-../.40.41.././.--.0./20/22/./0...-/.01//0./0-0-2//0/0--0-0--.........--,..,0---,,-.,---20,--/,-,,+,-,+,,,/-/-/-/,,-/--1--.-.-.-.---,-,-,,./,-,,--.--..-0///0//-./...//200.././0...-.--0/./00.31110..02./00....-..,-0-///0/-/,,,.-.-,-/--1.--.-,-,.,,,+,,-../,--,/,/,-,+,-,,,+-,**,,-,+,-+++++*****-+,,+,/-.-.++.,+-.++++++,*-+++,/-,,.,,,.+,,, @24 GTCTAATGCGCTTTGTTGGCGGTGGCTTGATGCAGGGGCTAATCGACCTCTGGCAAACGGTTTTTCCATGACAGGAGTTGAATATGGCATTCCGTAATCCCTCGTCGATGATCCAAGCGGGAGCGGTCTAAGTATTGCGCATATGCGCAGGGATTCAGTCTTGCGCCGCAAACAATGCGGTCTTGTACGCTCCGGCAGGCTGGGACATTGTGTCAGCCGCAGTCCACAGGCGTCCTGCCAGCAGTGGCTGGAGGAAGCCCACTGGCGTACTCTGACACCGACGAATTTACCCAGGTTTGCAGGAGGGCACAATGAGCCAGCATTTACCTCGGCTTCTGCCGCACAGCCCGGCATCTCATCGTAAGCAGAAAAACTGTCAGAATTACCCTCCCCCTGGCTCGCCATGTGCGCATTACGTTGAGTTAACCGCGAGGAGAGGTTGATTCACCCACTTGAGCCCGCGCGGTGGTTGCCTGTACACAGCCCACGGCAGGATACAC + ,.+*++/,.,-,--,.+-++,+++-,-+-+++.,*++-,*,,,,+,--.,-,++++..---,,,+++++++.,,,+,.*+**+++*,-,+*,),*+++*+**),**+,+++****+)),),++***+)()+**)+*+*,))*)-+-*+++*-+*+,**+,)+**)*)***+++*,*+,*,-.*,-**++*,,,,+-,,--,,++,---,,-,,,--.---.-,,,-1--,/-+,,,-/-0-//-././.-,..,2..------,-/---,.-,-/----,-,-..///--,-,,,,.,-,,-.--+,,+-.,.--,.,/,...-.,--+-,-..,,-.++,,/-..--,,./..,-.--,-,.,,/-,,./21./.-,-,+,.-,+,,,.-..-.-/-,.,-,-++++++..,,---.,-++-++-*-++*+*+*+*-+/+,+++,*))*+***,****+,-,-,++,,+**,+)*))++**-)),*+*,+-*,**),,* @25 ATGATGCGCTTTGTTGGCGCGGTGGCTTGATGCAGGGGCTAATCGACCTCTGGCAACCACTTTTCGAACGACAGGAGTCGAAACTGCATTCAGTAATCCCTTCAGATGATCCGCAGGGGAGCGTTTACATATTGCGCAATGCGCAGGGCAATTCAGTCTGTAAGGGTAACAATGCGTTTACCGGCAGGCTGGGACATTGTGCTACGCGGATTACAGGCGTCCTTGCGAGCGAGTGGCTGGAAGGCTCCGACTTACTCTGACAAGGGACGAATTTACCCAGTTGCAGGAGCCACATGCTACAGCATTCTACCTTTGGTCGCCGCACAGCCGGCATCTGGATGGCAGAAAAACATTTGAGAAGTGACGACTACGCGCCTGGGACTGTGCGCACATTACGTTGAGTTAACCGGAGAGGTTGATTCGCCATTACTGGCCGCGCGGTGGTCTCAGCGATTAGCCCCAAGCAGATACGC + /..-/0//001//.--..--,-.//.-.---1-...-/---/./-1.1./.--2--,..-,--+.+,-+,--...,--,.,-..-/..-.,-/.-0/0.-,-,/,-./--,-----.---11/..,./0////.-.0./.1-..-/----0/.0./.,,-,-,,-.-,-.../2,,.,,++,,.---.++**+0,,,,++****)))++*+***+*-+)+**,)**)))+***+,+*))(()**(*)(+)*)))*)+)*),+*(((*)(+*++(()())*+**)+**)))**++*+**++++-+,+,+**+*+)***)***++++,+,++++,+.,+*+,*****,,.,+,,,++)*)*)**+*+++**++,+0,,0+++-*+,,++,.+*,,,+,,,**+*,*)*)*,++++++,+-+,,,.-,--,+,.-.--..--,,---,,-0/--,//--.-..--..0.-...00/ @26 ATGGATCCATTTGTTGGCGCGGTGGCTTGATGCAGGGGCTAATCGACCTCTGGCAACCACTTTTCTCATGACAAGGAGTTGAATATGGCATTCAGTAATCCCTTCGATCAGCCGCAGGGAGCGTTTCACATATTGCGCAATGCGCAGGGGGCGATTCAGTCTGTGGCTGGCAACAATGCGTCTTACCGGCAGGCTGGGACATATTGTGTGTCAGAGCCGCAGTCCACAGGCGTCCTGCAGCAGTGGCTGGAAGCCCACTGGCGTACTCTGACACAGCACGAATTTTACCCAGTTGCAGGAGGCACAATCATCAGCATTTATCTATTTTGGTCGCCGCACAGCCAGGCATCTGGATGAAGCAGAAAAACCGATTTACGATCCCCTCCGCCTGAGCGGGTTGGCGCATTACGTTGTGAGTTAACCCAGGTTGATTCGCCATTACTGGCCCGCGCGGTGGTTGCCGAGACTAGCGCAGGCAGATACGC + -0-,--,--,-.,,,,,--/,,,.,,,,,-,.,,,-,.,,-/.--.-/./.-,--,-1/-./.,--..-.,-.-//.-/.--./.000//.1/.11/.00---.--/-//--.-..-..-02./-,0--.////..-/-///,//--+,-+++--.1,,.,,.,+-++,,+-++,-.0++-+-.***,,,,,-,++**,**,+-,++**-**-**,++,.+*+*+++-,0,+.++,-.+.--,,-,--,++*-,,--,/.--,-,,,-,-++,,-,,------.,*-*,-,+.+++,,,++-+.,,--,-,+,++++,-+*+++--,**+*****.+**+**++)-+,*-+****)+*)*))***,++-)))*))+*+**+++,,-,+*++++-,++-*+*.,3++,,.,,*.++-+-++++++..-..,--.,-,-.,--,./-.-.0....,..,/.--.--.---.-1/...-.-,-0-//- @27 ATGATGCTTTGTTGGCGCGGTGGCTTGATGCAGGGGCTAATCGACCTCTGGCAACCACTTTTCCATCAAAGGAGTTGATATGGCATTCAGTAATCCCTTCGGCTGATCACGCAGGGAGCGTTTTACGTTCTGTACACAATGCGCAGGGGCAATCGGTCTTGTGGCCGCAACAATGCGTCTTACCGGCAGGCTGGGACATTGTGTGGAGCCGCATTCGCAGGCGTCCTGCCAGCAGTGGCTGGAAGCCACAGGCGTACTCGACACCGACGAATTTACCCAGTTGCAGGAGGCAATGAGCCAGCATTTACCTTTGGTCGCCGCACAGCCCGGCATCTGGATGGCTAAGAAAAACTGTACGAATTTACCTCCGCCTGGAGCGTGGCGCATCATGTTGAGTTAACCGGAGAGGTTGATTCGCCATTACTGGCCCGCGCGGTGGTTGCCGGACTAGCGCAAGCAGATACGC + ..0..00-/1../--.--.,-/.--,/--./-/.//.-.//.0/0.//02/.-0.0/.-../----,,/.-.2..-...---./0-.0.-/-.../--/-,.,,-,,++.-,-,,.,,.1---,,-,-.--,,,.---,./---++,,--,,+,,,,,,-,,,-,,,,,,//.--2,.-,,,,,-+---.1+++*+..-,-,-+,,,,+,+,+,+.,..,,/,/,+++,,,,,,,--.,,++-+-.,+-,+,--,--,,+-,//-,-.,/..-,+-+--,-,-,,,,.+,-,,+0+**-,+--.,-,,,0,++++***+*))++)*)**,**,*,++++++*+*))*,,//,+,)),))))))))+**-*.++,+,,--,.+,,-,,-+,,,+,++--,-,,+++,,-+,,00/.0...-0./.00//..00./.,./-.-,--,-/-,-..---......0/2// @28 ATGATGCGCTTTGTTGGCGCGGTGGCTTGATGCAAGGCTAATCGACCTCTGGCAACCACTTTTCCATGACAGGAGTTGAAATATGGCATTCAGTAATCCTTCGAGATGGTGCCGCAGGGAGCGTTTTTACATATGCGCAATGCGCAGGCAATTCATCTGTGGCCACCGCAACAATGCGTCTTACCGGCAATAGTAGATGTGTCAGCCGCAGTCACAGGCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGGCGTACTCTGACACGACTGAATTTGAACCAGTTGCAGGAGGCACAATGAGCCAGCATTTACCTTTGGTCGCCGCACAGCCGGCATCTGGATGGCAGAAAACTGTCAGAGATTAATAGCTCGCCGCCGGAGCGTGGGCGCATTACGTTGAGTTAACCGGAGAGGTTGATTCGCCATCTTACTGGCCGCGCGGTGGGTTGTCTCAGGGACTAGCGCAATAAGGTTACGC + /..--.../00.--,,-...-../0..-,--.---,,,,--.---,--./,-/,-..-----+,+,.,-,---,,-++.,----,,0,-.---.,--,-,,,.,,.--+/.,+.-,,--+-...-+,,-,,.,,,,*,,,,++++,*++*++,-*+**-********+++,.+*,,++++,+***++,++,++,*+,,,,+*.*,,+-1+*++-,,,/-,0+++,+-+,-.+-,-----,---,-.....-..-//0-.--..--./0/./0,,//-/00----.-.-,--,--///--.-/-.-..,,....0,/-,--,...,/,,-.-,-,--,,---,,/,.......,,.+,,+,-+.++,+,.,,,,.+,+,,-.,,+-,-.,,.,,-,,-,+++**++*++,--,,+-+-++,,+*,,+*,*++++,-,,,+-,+**,++*+*,+,,,,,,,,,,,+++*++****-+- @29 ATGATGCAGATTTGTTGGCGCGTGGCTTGATGCAGGGGCCGTAATGGCGGGACGGTTTCCATGACAGGGGAGTGAATATGGCATTCAGTAATCCCTTTCGATGACCATCCGCACCGGGAGCGTTTTACATATTGCGCAATGCTAAGTCGAGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTTACCGGCAGGCTGGGACATTGTGTGTCATAGCCGCAGTTACAGGCGTCCTTTCAGCAGTGGCGCTGGAAGCCACTGGCGTACTCTGACACCGACGAATTTTTACCCAGTTGCAGGAGGCACAATGAGCCAGCATTTACCTTTGGTCCTACGCACAGCCCGGCATCTGGTTGATGGGCAGAAAAACGGTACATAGAATTTACCCTCCCGCCTGGAGCGTGGGCGCGTACGTTGAGTTAACCGGAGAGGTTTTGATTTCGCCATTACTGGCCCGCGCGGTATTGCCGGACTAGCGCAAGCAGATACGC + ++-++,,,-)/.++,+*+++++-+-,++,+-,,+-,*+-*.+,/--0+.+,+-,--,,+,,+.,,+,,,,--,/--...-/..-..,,/-.-.0..,.+-+-,--+.,,.,--.,,./..23---,---..0-.,,,,.,,,+,+++,**,-*++++,,+,++,+,,,.++***-,+-/,,,**+**+*+**+-*+**++,-,-.,,,,,,+,/,.+,-,++,--.,4+-,++,,++,-,-----.---,..-.-/-.-/..----/..--//--//0---,./.,.-00//0./-../-1.-./101.--/.----/.-.-0---+-,-+,00-,./././-0/-.-..-.-,/.---,,,,...0/-,,+--,,,/1/....-...-0..,//---.2/-,.1-0..,-/--------,,/2-..,/,/--,1-.-.,,-.-/-.-..,-,,-,,,++++,++-,++-,,,-,1++-,++-+,,--, @30 ATGCAGGGGCTAATCGACCTCTGGCAACCACTTTTCCATGACAAGGATTGAATATGCATTCAGTAATCCCTTCCATGATCCGCAGGGAGCGTTTTACATATTGCGCAATGCGCAGGGAAATTCAGTCTGTGGCCGCAACAATGCGTCTTACCGGGGCAGGCTGGGACATGTGTGTCAGCCGCATTGGCAGGCGTCCTTCAGCAGTGGCTGGAAGCCCACTGGCGTACTCTGACACGACGAATTTTACCCAGTCAGTTGCAGGAGGCACACAATGAGCCAGCATTTTACCTTTGGTGCCGCACAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAATTACCCTCCGCCTGAGCCTTGGCGCATTACGTTGAGTTAACCGGAGAGTAGGTTGATTCACCCATTATGGCCCGCGCGTGTGTCGTTTCGCCGGACTAGCGCAAGCGAGCAGATACGC + ,,,+,,-,.,,-,,-,1---..-+-,-./1...-,,--,.-./----.--../..0////./.10-/--.-//,-/....-.-,,-.,/1.1/.-..2//10.///-...1/.-,,.../.21.00-./----...-.0//13/---..-,--,-...//-0-,,0.-/.---,,/--.,---,-,----2.--,.----.0.-//--,-,,-,,.--,--,--..0,/,.-...-..0//000---..0.,0-.-.-,.-----/0/-///0//-//0/001/0-../..,/,.-.+..----/-.-1.0....//2../-.0.020---/,,-----.,-----/,-----.-,.,,,-.----0.--.0,,,-,,,/,,+--++,---/,,,,,+--./-+/,,-,...-./---/-,0.,+.,,,,--,,+,.,.,,-.-,,----.-/,. @31 AGCCAGCATTTACCTTTGGTATGCTTCTCGGCACAGCCCGGCATCTGGATGGCAGAAAAACTGTCAAGAATTACCCTCCGCCTGGAAGATGTGGGATTACCAGTTGAGTTAACCGGAGAGGTTGATTCGCCCGGTCAGCCTGGCGGGGGGTTAGTTGCCGGACTAGCGCAGGCATCAGGCTG + ,,,+++-./-.,-,..,,,,+.,--*-,+++,+,,+++,++.,,,,++++**++**++-,*,+++))*+)**,*+*++.+-+,.++-.-+--++++++***++,*-+*,)**+)))+,*,+.,.--,,++*++****,+*,**,*)*++*,-**+*++*+*,+**+,+,,++,**))))(*, @32 GTGATGCGCTTTGTTGAGCGGTGGCTTGATGCACCAGGGGCTAATCGAGACCTCTGGCACCACTTTCCAGTGACAGGAGTGAATATGGCATTTCAGATCATCCCCTTCATGATCCCAAGGGAGCGTTTTACATATTGCGCAATGCCAGGGGCAATTCAGTCTGTGGCCGCAACATGCGTCTTACCGGCAGGCTGGGACATTGTGTCAGCCGCAGTCACAGGCGTCCTGCCAGCAAGTGCTGGAAGCCACTGGCGTACTCTGACACTAGACGAATTTGGGCGTTTGCAGGAGGCACAATGAGCCAGCATTTACCTTTGGTCGCCGCACAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAATCTAGCTCCGCCGGGGAGCGTGTGGCAGTATTACGGTCCAATCGGAGAGGTTGATTCTACGTCTATCGGCCCGCGCGGTGGTTGCCGGACTAGCAAGCGAATACGC + )+**++,++*++,*++*,+,*+++**)+**+,+)))-*++*)*++*,*,+++,,-+,**+,,-,-,+***+++,,,,.++.+,,+,++,,+++,+,+-+,++++,+++*+--,,,+,+++---00-.,++,--+,,,,+,,-,.-,,.+,,,.--,-.,...---.-,,,,//.-./,++++,,----,,/-,,,,,.,.-.-,,,,,,,-..,,,,,,-,/+******+++++,++++,+***,-,,-,,,+-+,,+,0,,+,,+,++,+,++,-+,+-,,,,-+,---+,/+,/0,1,,,----.-,-+++,-++*+**+,+++,,++*+++,+,+,+++*,+,+*+.+/,**))(()*()()()()()**)*))*))*+)))+))*))**))+(()(((((((()*(*)))+*)**)****))**)********+*))*))))*))*+***+++++*,**,*+++ @33 ATGATGCGCTTTGTTGGCGCGGTGGCTTGATGCAGGGCTAATCGACCTCTGAACATTTTCCATGACAGGAGTTGAATATGGCATTCAGTAATCCCTTCGATGATCCGCAGGGAGCGTTTTACATATTGCGCAATGCGCAGGGGCAATTCAGTCTGTGGCCGCAACAATGCTTACCGGCAGGCTGAGACATTGTGTCAGCCGCAGTCACAGGCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGGCGTACTCTGACACCGACGAATTTACCCAGTTGCAGGAGGCACAATGAGCCAGCATTTACCTTTGGTCGCCGCACAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAATTACCCTCCGCCTGGACGTGGCGCATTACGTTGAGTTAAAGCCCAGAGGTTGATTCGCCATTACTGGCCCGCGCGGTGGTTGCCGGACTAGCGCAATAGCAGATACGC + +,-,,,,--,.,,-,,,--/-,-.--.-/-//0--,..,..--///.0..--,0-.,--,,,,+,-.,,,-.+,,,.-/--,,,,-,-,,,,..-.,/-,,,,--.--,---/--,-,,++.-.-.-/+,,-.-/2+--.-.---.-.---/-..--..--,--.-/-..----..-.....-.---...//---/,/--..-,,-,.-,+--,,,-//-,--.....-.--..,./..-..../--.----,-..-,---/---,,.,..-..-,---/--.,,-/.0---//--,-.----..---.-...---./////..1//./0../-.//./3/32./-....02/...////100/0/00222/102210/0110/0////./.,../1/0/.../...12/010/3/122/..////.-.,..,-----./-..//../.-0..//0/100 @34 ATGATGCGCTTTGTTGGCGCGGGCTTGATGCAGGGTAATCGACCTCTGGCAACCACTTTTCCATGACAGGAGTTGAATATGGCATTCAGTAATCCCTTCGATGATCGCAGGGAGCGTTTACATATTGCGCAATGCGCAGGGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTTACCGGCAGGCTGGGACATTGTGTGTCAGCCGCAGTCACAGCGGCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGGCGTACTCTGACACCGACGAAGTTTTTGATGAGTTGCAGGAGGGCACAATGAGAGCCAGCATTTACCTTTTGGTCCCGCACAGCCCGGCAGCCGGATACAGAAAACTGTCAGAATTACCCTCCGCCTGGAGCGTGGCGCATTACGTTGGGTTAACCGGAGAGGTTGATTCGCCATTACTGGCCCGCGCGGTGGTTGCCGGACCCAAGCAGATACGC + -.--.....--,,++++/,,,,.--,-++-,,++,,,,,-,/-/-/.---/,,-.----*++,+,,,.--+,.-,,-,,---.--,-.01-./00-/-,-.,--.-.--,-,-.1..--..300/.--.0./.....0//--./0./,,..---,,-,,,0,,.---0,.-,-,++,,,/-,---,,+--,,,,+++.***+**+.-+++,+.++,--++*+-,,++*-,+,1,+,,+**++*+++,-++,++.+++-+--,,,,,,-,,,,,+**,+,+,/+,,,+,**,+++,-+++,***+***-*+,))*+*+*)*)))*),+++,+,+++,*)****))))(+(**+))))(()*-*+*)*+****+***+**+-****)***++)+*****++,,++,,+,,..-,,+++++,+*++++-,,,-++,+,,,+,-+,+,,,,,.,,-,,,,.,,,++,+ @35 GAGTCGCGCTTTGTTGGCGCGGTGGCTTGATGCAGGGGCTAATCGACTCACTGGCAACCACTTTTCATGACAGGAGTTGAATATGGCAATTCAGTAATCCCTTCGATGATCCGCAGGAGCGTTTTAAGTTTGCGCAATGCGCAGGGGCAATTCAGTCTGTGGCCGCAACAATGCGCTGTACCGGCAGGCTGGGTTGTACACGCGGATGTTGCATACAGGCGTCCTGCCAGCAGTGCTGGAAGCCCACTGGCGATACTGACACCGACGAATTACCCAGTTGCAGGAGGCACAATGAGCCAGCATTACCTTTGGTCCTGGGACAGCCCGGCATCTCAGGATGGCAGAAAACTGTCAGAATTACCCTCCGCCTGGAGCGTGAATCATGAGTTAACCGGAGAGGTTGATTTCACCTTATACTGGCCCGCGCGTGGTTGCCGGACTAGCGCATAGCAATACGC + --.,,/-.---...0--.-----./.-./-0..--.-/.-//./--0/-../..--0.000../..--../././.--......./.....-.--/../0./...-.-..--10.-.-..../.-,..0.--./....0/....-00..0////..0.//./.--,-..//./.,/---/...-/..//.---0.1-,00.//-----,.-..-,-./-0-,+-,..-.../--/.,,-/-./-..-.../----/-../-.--,-,..,,,,.,.--/-.-,-/+,----...,.,-,/-,,+,,,--,,+,+,+,++,-,----/---.--,-,,.------,-.-.-,,-+,-.---.,0-,,---...-.-/01--/..+..-/--.-/..-.,.,.0/----.-,-.---.-//..-.....-----,//--,---,/.---,.-0./--..- @36 TCACTCCCGGCCGGCGCGGTGGCTTGATGCAGGGGCTAATCGACCTCTGGCAACCACTTTTCCATGACAGGTTGAATATGGCATTCAGTAATCCCTTCGATGATCCGCAGGGAGCGTTTTACATATTGCGCAATGCGCAGGGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTTACCGGCATAGGCTGGGACATTGTGTCAGCCGCAGTCACAGGCGTTATGCCAGCAGTGGCTGGAAGCCGACGGCGTACTCTGACACCGACGAATTTACCCAGTTGCAGGAGGCACAATGAGCAGCATTTACCTTTGGTCCCGCACAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAATTACCTCCGCCTGGAGCGCATTACGTTGAGTTAACCGGAGAGGTTGATTCGCCATTACTGGCCCGCGCGGTGGTTGCCGGACTAGCAAGCAGATACGC + ++*)))**+++**+++-,+,+-,,*+++*,++*+,+*+++--,-,/,,++++.+-.----.-,.-100..---,-....-../-0.-.////////0./-/11001/0000/0/1610/..11211/10./100221010/01//1/021002/0/../...--/0.0/1-00-/.--.,.-.-/--,-,,-,-...--,,+,,,,.-,,-+.,---.-.-,---,//.-/0/-.--.-/00./0.//.-..1.3.//0/..10131131..-01100/---/1.-../12///.0..//..0.//20...----0--..--,-,..-.,./.-.-.-/,,/..0.-,,,,/-/..--,,,-././......--...//.-.----/-,-.-.-.-,0--./-/-,-/--////..-----,-+-.-.,-..,,//./..0.././-..0/. @37 ATGATGCGCTTTGTTGGCGCGGTGGCTTGATGCAGGGCTAATCGACCTCTGGCAACCTTCCATGACAGGAGTTGAATATGGCATTCAGTAATCCCTTCGATGATCCGCAGGGAGCGTTTTACATATTGCGGGACTTCGCGCAGGGGCAATTTCAGTCTGTGGCCGCAACAATGCGTCTTACCGGCAGGCTGGGACATTGTGTCAGCCGCAGTCACAGGCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGGCGTACTCTGACACCGACGAATCAACGAGTTGCAGGAGGCAATGAGCCAGCATTTAGTTCTTGGTCGCCGCACAGCCCGGCATCTGGATGGCAGAAAACTGTCAGAATTACCTCCGCCTGGAGCGTGGGCATTACGTTGAGTTAACCGGAGGTTGATTCGCCATTACTGGCCCGCGCGGTGGTTGCCGGACTAGCACATAGCAGATACGC + ,,.,,,,,+,-,,,-+,-,,.,,-,,,,.+,./---.,+,..///./2..--,+,,.-.--./.-./1.0-..-..-./-.-.,,.-,,-.-..--,-,,---,-1/./..0.-/1///--/1//100.-.-.-..0/.2.///..0/0/00/../1//..//0.//0/02/2102..././3/00/00///..000000/...//2/01../.0././---.-/-,---.--..,,,+-,,.,+-,,,--0-/,,.,,,+,,**)+)*))+*)**++*,+*****+)*-*+,***+++-.-++)*+*++*)*****,+++,----,,--,,,./-/.,-++,+,/1//.--,,,/---,.--.//./00./////../././0./.---//...00.--,.-/---0.-../..-/-.../0//..,..-.-/.-,,/.---./..-,---.,,-.0. @38 ATGATGCGCTTTGTTGGCGTGGGCTTGATATGCAGGGGTAATCGACCTCTGGCAACCACTTTCCATAGTGACAGGAGTTGAATATGAGAATTCAAGTAATCCCTTCGATGATCCGCAGGGAGCGTTTACATATTGCGCTGAATGCCCGGGAATTGGGATAGCCTAAATGCGTCTTACCGGCAGGCTGGGACATTGTGTGTCAAGCCGGGATTACATCGTCCTGCACAGCAGGTGCTGGAAAGCCCACTGGCGTACTCTGCTAACCATGACGAATTTTACCCAGTTGCAGGAGGCACAATGAGAGCCAGCATTTACCTTTGGTCGCCGCACAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAAGCGTCTCCGCCTGGAGACGCTTAAGGGTCCATTTGAGTTAACCGGAGAGGTTTGATTCGCCATTACTGGCCCGCGCGTGGTTGCCGGACTAGGCGCAAGCATACC + -./-.....-./,--,,-/,-,,,-,-,-+,..-,,-,,+.,.,,-,---.,,,.-./-,-++++,+-.,,.,,-,-.++,--,--,-,,/,,+,--,,,,--,,/,,-,++*--,,--,+,.,,,,*++,,,,-,++.,,,/-,,,,-,-,.,-+++-++++++---,.+-++*+*+,,./-/-,,+++-,,-+,,+,++*,-+*++-,+*+*+++,*+*++*,,,),+**,,**-**+**-**+*,*+,+++**-+*,)),*++-******++*****+-,,,*)****),,-+*,+/****,++++,*++,+,++++++++,+,+++,+,,,++--,+,-,,,-,++**,.--,,.+++**+++*++,++,,+,.,+,+..,++,+++-+-+--*+.++++*+++***+*,,++*-++,,*++,+++*++++**+++,,,,+,*+,,+*,,++,,,-,,,,-,-++, @39 ATGATGCGCTTTGTTGGCGCGGTGGATGCAGGGCTAATCGACCTCTGGCAACCACTTTTCCATGACAGGAGTTGAATATGGCATTCAGTAATCCCTTCGATGATCCGCAGCGGAGCGTTTTACATTGCACAATGCGCACAGGGGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTTACAGTGCGCAGGCTGGACATTGTGTGTCAGCCGCATCACAGGCGTGCTCTGCCAGCAGTGGCTGGAAAGCCCACTGGCGTACTCTGACACCGACGAATTTACCCAGTTTGCACAGGAGGCACAATGAGCCAGCATTTACCTTTGGTCGCCGCAGCAAAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAATTACCCTGCCTTCAGGCGCATTACGTTGAGTTAACCGGAGAGGTTGATTTGCCATTACTGGCCCGCGTGGTTGCCGGACTAGCGCAGCAGATACGC + ----...----.,.,,+,++-*+++++/-,--..,--.....-/.0/-,+.,-/-..--.,,,,,-,,-......./....0--0/--../--.-,,,--.//...-.-.-/--./.-.--,//0..,,,,0/---,,/----,---.-/.-..0.,.,--,/,+,-......-,,,,,----,/-./-.,/,,.////.-,,,,-,,,-.--,-/.-,1,-+,++,,,-,-/1--0/,,-++,,,0++,+.+,,,,++,-,,,.--,+,,,-,,,-++,-.,.,+-**-++--+++++-,.0-,,,.,-.../-/.0.---,,,----,/,,/,,----.1/.-.0/./../.--0/130201.-.../----....--....0../0///023/...1..0001//0//.3./..-/,--//.0./11.-./////.//4/..../.../..-,.--,--/++ @40 ATGATGCGCTTTGTTGGCGCGGTGGCTTGATGCAGGGGCTAATCGACCTCTGGCAACCACTTTTCCATGACAGGAGTTGAATATGGCATTCAGTAATCCCGTGCTTGTGTACGCAGGGGAGCGTTTTACATATTGCCAATGCGCACTAGGGGCAATTCGGTCTTGTGGCCGCAACAATGCGTCTTACCGCGCAGGCTGGGACATTGTGTCAGCCTCAAGAGCAGCCGGAGTCGGCCAGCAGTGGCTGGAAGCCCACTGGCGTATACTCTGACACCGACGAATTTTACCCAGTTGCAGGAAGAAATGAGCCAGCATTTACCTTTAGTCGCCGCACAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAATTACCCTCCGCCTGGAGCGTGGGCGCATTACGTTGAGTTAACCGGAGAGGTTGATTCGCCATTACTGGCCCGCGCGGTGGTTGCCGCAAAGCTAGCGCAAGCATACGC + ./01-10//0///1..///1/.//0//.2//001.//10.1100000003/33/.2/12101/.///.0/0//0500///00/1///0.12//0///00/000,/-/-/-----.---/-./0/--,,.-/.1..-,././.-,,.,-,-,.,---,,-,//+-,,+,,++,+++./-...--+-+,+++*+,,--,,++,/-.--,,+++--,++-,.,----///.,..,,.,+-,---./-.-,,,+-..,-,---,.,,./0..-.1//1.--.-/.//.-,/,--/.--/-.,-,,/,../---.--.//./-..1-,+.,,,-/--,----,-,-/-.-..--.-0-----012//./-,/,.-,,.,/.-/./-..--../1--/0-/0.-./1/0....-0//.--.--.,/-0.-.1.-.../1-//-.--...-.,-0/.-0.,0-,/...--013,-.-.--.3// @41 ATGAATGCGCTTTGTTGGCGCGGTGGCTTGATGCAGGGGCTAATCGACCTCTGTCCCAACCACTTTCCGCTGACAGGAGTTGTAGAGTATATGGCATTCAGTAATCCCTTCCATAAGGTGATCCCGCAGGGAGCGTTTTTAGATATTGCGCAATGCGCAGGGGCAATTCAGTCTGTGGCCGCATAACAATGCGTCTTACCGGCAGGCTGGGACATTGTGTGTCAGCCGCAGTCACATACCAGGCGTCCTGCCAGACAGCAGTAGCTGGAAGCCCACTGGCGCGGAGTCACTCTGACACCGAGCAGGACGAATATTTTACCCAGTTGCAGGAGGCACAATGAGCCGGCATTTACTTTTGGTCGCCGCACAGCCCGGCATCTGGATGGCAGAAAACTGTCAGAATTACCCTCCGCACTCGAGCGTGGGCGCATGTCAATCGTTGAGTTAACCGGAGAGGTTGATTCGGCCATTACTGGCCCGCGCGGTGGTTGCCGGACTAGCGCAAGCAGATACGC + .-/-,-...../---,,.,-.,.-/-,,,,--0/+.--.,,..//.1,0-0//,,,--..1/.--,,-/,,-,,.--++.,,,-.-,,.-*,,,2+,.-++,*,-,,,-,-*-*+-+,,++.,+-----.--06/--,,+,,,,-.,,-+0,../,-,.**,++,-,./-2.,-0,,,0--/+.,--.--/..-0-.-,,-.---/,/,,.---./-/,+.,+.,,,-0--,/1*/-,*,,,.,*+*++,++,+,++,-+++*+,+*++++*++++.-,++**))**-***,)+*,**,,+****)+*-).+*+-+*++++++.++0*++++++,---.-+,,.+,.,,/*+,/--,,,,,--,-,,.+.-,+-,,,,/--++,-,+++-+,,,,,+*++-,-,++-,,,,--,++,++,-,,.*,,-,,-+,.-++++,,,++-,*+*+,+,+,,+++*+,+,-**,,,+-.,+,.+,-,-,++-,+-,,+++,,++++,,.,---.,-,,-,, @42 ATGATGCGCTTTGTTGGCGCGGTGGCTTGATGCAGGGGCTAATCGACCTCTGGCAACCACTTTTCATGACAAGGGGAGTTGAATATGGCATTCAGTAATCCCTTCGATGATCCGCAGGAGCGTTTTACATATTGCGCGCACTTCGCGCAGGGGGCATTCAGTCTTGTGGCCGCAACAATGCGTCTTACCGGCAGGCTGGGACATTGTGTGTCAGCCGCAGTCCACAGGCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGGCGTACTCTGACACCGACGAATTTACCCAGTTGCAGGAGGCAATGAACCGAGCATTTACCTTTAGTCGCCGAAGACTCATGGCATCTGGATGGCAGAAAAACTGTCAGAATTACCTCCGCCTGGTGGGAGTGGCGCATTACGTGTTGAGTTGACAGCCGGAGAGTATTGATTCGCCATTAGTTACTGGCCCGCGCGGTGGTTCCGGACTAGCAAGCAGATACGC + ..0.-./0-../..---..0.-....--0./00---./.-/0/0100/3//1..-0.02000/./..1/1./1///000/00/0/2/0111120/0/0/00..0././0//.1...//2..2...-,.0/.2.0---.-/-,--.-/..-,.,----....,-,..----,,-+,,//./0,-,+--,,-,/.-.0,0,,-112/1/./.-../02/10./0./201/2..////////10/202.//..00/00.-0-0.0./..3./01-1.-..-./--.,-,-.-//--+,+-+++++,1.*,+-+,-0/--+,,0+++**+++*-+++++++++**++++-,-..,,,,++,-..,+--+,+,.--,+++,+,-++++.+-.+,/,0,,./.--.+.,+,,,,.,0,,--,/.,,-,+,+,+.,-,.,,-..-,,,+,,---.++,,/,,+,/,,,.,,,-.,++,-+,.-.-.,--+ @43 ATGATGCGGGTTTGTTGGCGCGGGCTTGATGCAGGGGGCTAACCCACTCTGGCAACCACTGTTTTCCATGACAGGAGTTGAATATGGCATTCAGTAATCCCTTCGATGATCCGCAGGGAGCGTTTTAGTGCGCAATGCGCAAGAAATTCAGTCTGTAAGGGACCAATGCGTCTTACCGGCGGACAGGCCCTCTTGTCAGCCGCAGTCACAGGCGTCCTGCCGGAGCAGTGGCTGGAAGCCCACTGGCGTACTCTGACACCGACGAATTTTACCCAGTTGCAAGGAGGCACAATGAGCCCTCAGCACCATTTACCTTTGGTCGCCGCACAGCCCGGCATCTGGCTTGGCAGAAAAACTGTCGAATATTACCCTCCGCCTGGAGCGTGGCGCATTACGTTGGCAGTGCTGGAGAGGTTGATTCGAGGTTACTGGCCCGCGCGGTGGTTGCCGGACTAGCGCAAGCATACGC + ,,+++,,,,,-.,,,++.,---,-,.--.,--,,.--,,,,.,,....//../-..1...,/--,.0/-0///..--/,,-/.-./0.--.2,.0../0/--/,,-,,,-,+,-------/-++***+,+*+.+,-,*+*****,**),)(*()()())*)*)+)*+,***)())(+*++*)****+)+)***),)))*)+*,*+*.**++0+,,**.,****++-,+.-+++,++,,+++-,-.----,--,-,-.,,,,-,/--//-+-+-.--.+,+/.,.,-/--/0//0-..,.,-.-0--./.0,-.00/.,,,,--,/--/..1...20../0/-10---,0-////1--.-,--//-./-.....-0../..-./.-,--.0-,--0.,+,,,,,-,+-+,+,//--/,--,+-,,,--+,-++.+,,,-/,,-,,-,,++,,-,,,,---,---,,,.0, @44 ATGATGCGCTTTGTTGGCGCGGTGGCTTGATGCAGGGGCAATCGACCTGGCAACCACTTTTCCATGACATAGGGTCGAATATGGCATTCAGTAATCCCTTCAGTCGATCCGCAGGGAGCGTTTACATATTGCGCGCAATGCGCAGGGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTTACTACCGGCAGGCTGAGAAGATTGTGTCAGCCGCAGTCACAGGCGTCCTTGCCAGCAGTGGCTGGAAGCCCACTGGCGTACTCTGACACCGACGAATTTACCCAGTTTCAGGAGGCAATGATAGGGATCTACCTTTGGTCGCCGCACAGCCCGGCATCTGGATGGCAGAAAACTGTCAGAATTATGCGCCTGGCAGTGGGCGCATTACGTTGAGTTAACCGGAGAGGTTGATTCGCCAGGGTACTGGCCCGCGCGGTGGTTGCCGGACTAGCAGCAAGCATACGC + //1/.1/.///0/100/001/.//0...1/./0./-.0//2//001.2/.--0-//../---.--,,-.,,--,,+.--/.--...-./-,-..----,,,,,-,,,,,+/,.--.-,-/,.,+.-/../.,-+.+,,,+,-,,++++*+-+,,/,-,,,-,,,++++.+---/0---*++-*+++++,-,--+,+++,,.--.-,,,+--,,,,,,,-0--,1,,,,,,+,--+..--.0-/,,.,-/--0----./,.-,,/,,--,-,--/002/-,,,.,.----/-.-,,--.-.--+-.,-../--.1..----../.-.-/.0....00/0/././..//..211/0///0/110/./0..1/0////0/2../.00/..00//../..../-.-...0/0.2/..../.-.-,,--.0-.--.---.,-/--,//-,-/--,--,.,,,-.,+,,.+ @45 ATGATGCGGCTTTGTTGGCGCGGTGAGGTTATTAGCAGGGGCTAATCGACCTTAGCAACCACTTTCTAAGTGACAAGGAGTTGAATATGGCATTCAGTAATCCCTTCGGATGATCGCAGGGAGCGTTTTACATACTGGTCGCAATATTGCGCGGAAGTGATTCAGTCTGTGGCCGCAACAATGCGTCTTACCGGCAGGCTGGGACATGTGTGTCAGCCGCAGTCGCGCAGGCGTCCGTGCCAGCAGTGGCTGGAAGCCACTGGCGTACTCTGACACCGACGAGAATTTTGACCCAGTGCAAGGGAGGCACAATGAGCCAGCATTTACCTTTGGTCGCCGCACAGCCCGCAGGCATCTGGATGGGCAGAAAAACTGTCGAAGATTACCCTTCCGCCCGCTGAGGGTGCGCATTACGTTGAGTTAACCCGGAGAGGTTGATTCGCCATTACTGGCCCGCGGTGGTTATGCCGGACTAGCGCAAGCAGATACGC + ,,,***),..*,**)))**)+***+(*)*)))*())+***+*)*+,,++*,****+*,+-,+*,)))+**-*+**,+,++,**+,---,,-,+-,,+.-+,++++-*,***)))*++,+,++,--*+***+-*-+/+,*)*,*+**,*+))*)**())()*****,**,*)**)))*+/-+./,,+*)*))+*,,,+-*)++*.*++++++++.++++,-++,+,,,,,-/-+*,*,**+-,0-,+,+,,++*++,++-+,-,,,-,-,-+,-,.+-,--.,,.+./,+,.,-,,,.+,+--.--+,,+-.-,-++-,--2.-/++,,,.++-,*,,+.,-,-,,,--+++,,++,+,*++,*++++.,,--,+,-**,,*+,,+-,--++,,-*,++++++++,,*,,,+.,-0+++,+.+++*+**+,,.,-,*-**+*++*+*,,*+-,+*++,+*,+*-****-******)+,*+**+,)*+)**** @46 ATGATGCGCTTTGTTGTGGCGCGGTGGCTTGATGCAGGGGGCTAATCGACCTCTGGCGGACAGATTTTCCATGACAGGAGTCGAATATGGCATTCAGTAATCCCTTCGATGATCCTAAGGGGAGCGTTTTACATATTGCGCATGCGCAGGGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTTACCGGCAGGCTGGGACATTGTGTATTCCAACACCCACAGGCGTCCTTGCCAGCAGTGGCTGGAAGCCCACATGGCGTACTCTGACACCGGATGACGAATTTTACCCAGTTGCAGGAGGCGCACACGCCAGCATTTACCTTTGGTCGCCGGCACAGCCCGGCATCTGGATGGCAGAGACGGTACAGAATCCCTCCGCCGGGTGAGCGTGGGCGCATACGTTGAGTTAACCGGAGAGGTTGATTCGCCATCTTACTGGCCCCACGCGCGGTGGTTGCCGGACTAGCGCAAGCATCAC + -.0--.//,.-.--.-.,.,-.-,---,,,-++02-,,,..,+,.,,-,.,.,,/.,,*,.+,,.,,*,+,,+*+,,+-++*-,+,0+--,,+.,++.,,,,,-+-,,+-.-,,,++,,,,--,17/.-,-0.3/0.0-,/.-..,.--,./-..---,../,,,.--,,,,./01,0.,-,+,,+,-+,/,,-,,,,,.//-,,,+-,,,,+-/-+++,/-.,.,++-,+-*+/,-.++/,++-+,+-0-++-+++-,-+,,,-.++,,-,,-+,,.-,,*,+.,**,+,+-,,+++,+++++)++,*,++**++,,,+++/-+++,***,****,,,,-,,+-,-+-,.,---.0-.,+,,,,,//,-/-,/-,,--.--,,/./.01/..-.001./021..--0...-1../..0//.0.1.2/-/-..--.....//-011/0.--,/----....-0...000-.-/.,,,-. @47 ATGATGCGCTTTGTTGTGGGGTGGATGCAGGGCTAATCGACCTCTGGCAACCACTTCCATGACAGGAGTTGAATATGGCATTCAGTAATCCCTTCGATGATTGCAGGGAGCGTTTACATATTGCGCAATGCGCAGGGGCAATTCAGTCCTTGGCCGCAACACCGTGGCGTCTTGTAAGCAGGCTGGGACATTGTGTGTCAGCCGCAGTCCACAAGGCGTCCTGCAGCAGTGGCTGGGGAAGCCCACTGGCGTACTCTGACACCGGCCCGAAATTTTACCCAGTTGCAGGGCACAATCGGAGCATTTACCTTTGGTCGCCCGCACAGCCCGGCATCTGGATGGCAGAAAAACTGTCACGATTACCCTCCGCTGAGCGTGGCGCATTACGTTGAGTTAACCGGAGAGGTTTATTCACCGCTTACTGGCCCGCGCGGTGGTTGCATTAGGACCAGCGCAAGGGCAGATACGC + /---./../--+*,+-++,-,-,+,+0,,.,,.,.../,-,--,-,,,,,-.-,,,+*+++,,-,-,,..,,---.-.0/-....0-./--/-.,,.//----,----.-/./-/--/1/0/0.-1///0-.----.-.,/.01.,/.,,-/-,-.-./0--../,../.-0,,,.,.....---,,1,,--,,/,++++,,,.,-,,*++,,,.2,+,*++,,,,.,-..----++-++-,+,/+.++,++--+-*+,/-,,,,-++,,-..-,+-+--.,,,+,,-,,,,-,++,+*+,---.+,-/-.++++*+++*,/,+,,-+,,+/+,,+++**,,,++-,,,.,.,++*,,++,*+++**+++,,++,,+,+*+.,+,.,/,+-,,,+++*+**++++++**)-,++*,,++,.+-+,.+,--..---/+-+.++.+++,,-,,,-,..-,-./.-.,.... @48 ATGATGCTTTGTTGGCGCGGTGGCTTGATGCAGGGGCTAATCGACCTCTGAACCACTTTTCCCCATGACAGGAGTTGAATATGGCATTCAGTAATCCCTTCGATGATCCCGCGCAGCGTCAGTTTACTCACATATTGCGCAATGCGCAGGGCAATTCGGTCAGTAGCCGCAACAATGCGTCTTACCGGCAGCTCGTGTGTCATAACCACCCGGGTATCTTCACGCTAAGGCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGGCGTACTCTGACACCGACGAATTTTACCCAGTTGCAGGAGGCACAATGAGCCAGCGGATTTACCTTGGTCGCCGGCACAGCCCGGCATCTGGATGGCAGAAAAACTTCTCAGAATTACCCTCCGCCTGGAGCGTGGCATTACCGTTGAGTTGACCGGAGAGTGTATCATTCGCCATTACTGGCCCGCGCGGTGGTTGCCGGACGACTAGCGCAAGCAGATACC + /.20///../0..-..-/.,..--.-1---.--,-..-/////...-,--,,--------+++-,,+.,+,---,-,,+-,,,.-,,,,,--+++++,,,.,+,,,,,+-.*.++-.-,*++-+***,*-,-++.+,*)*-++-,+,,,+--,-,,.++++,+)**+***)**.,,-1*,*)*****)+,++*,*,-*-,,**)+++-+*+++*,+-,.,+,,,-,+++,0+-,+,,,,,+--,---,,++---0/-.-.-./0,-/.1----/..-.0-...-/0---1//./..--.2-0...-//0.,-0..0.,.--0-.-.-/,,,--.---,/0-..-,/..-.-/.-/---,/,-.320--.,,-,---+,-++.,.,/,.---../-,/-..--,/2,,,-.,,/-,,-,-++-+-+,*,,+-.-,-.-,.,-0/,-,-.-,,+,/++,+*,+++***,,**-/**+++*+**+** @49 ATGAACGCTTTGTTGGGGCGCTTAGATGCAGGGGCTAATCGACCTCTGGCAACCAGATTTTCCGATGACAGGAGTTAGAATATGGCATTCAGTAATCCCTTCGAGTATCTGATCGCAGGGAGCGTTTTACAACTTGCGCAATGCGCAGGGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTGTACCGGCAGGCTGGGACATCTTGTGTGTCAGCCGCAGTTACACATGCGTCCTTGCGCCAGCAGTGGCTGGAAGCCCACTGGGCGTACTCTGACACGAAGCGAATTTTACCCAGTTGCAGGAGGATGGCACAATCAGCCAGCATTTACCTTTGGTCGCCGCACATCACCGGCATCTGGATGGCAGAAAAAACTGTCAGAGAATTACCTAGCCGCCACTGGAGCGTGGGCGCATTACGTTGAGTTAACCCGGAGAGGTTGATTCGCCAGTCACTGGCCCGCGCGGTGGTTGCCGGACTACGCGCAAGCAGATACGC + -+,,,,-,,-+*+**,*+-***)*+)),))*+**))+*,,+.+,+,-+*+**+-)*****)**,**++**++++-+*.+++,,+--,++,++.*++,++++)*,*++,,,,+++,,.---,/5.-,-,--.-,--,+,/,./0,---++---,..0,-10-,/-,-.--.,.0/-.9--/,,.,,,--.0../---+,0-./,,-/-.,/+,,,-,.0+.-,+.-,.,.-,.,+,-..0-,-./,,/.--.--/,-,,,,--,-/,-/,/--,--,.//---,,./30.,,,...-0,-,,-,-++,/,-,..-/.,/,--/-00-/,-,.--,,.-,.,+----.+/.+-,-,--/.--.0...--/./000///,,/,-00/0/--/,-....0..../...00.0--..6.-..0..-.0..-,--,,/.,-./---/-.---/--.-/,-..--+,-,--++,,*-+,+++++,*-,,+,.,,-.3++,-,., @50 ATGATGCGCTTTGTTGGCGCGGTGGCTTGATGCAGGGGCTAATCGACCTGAAACCACTTTCTCATTATAAAGAGTTGAATATGGCATTCAGTAATCCTTCTAGATGATCGCAGGGAGCGTTGCCCACTTAGTGCACCGCGCAGCGCAGTTCAGTCCTTACTAAACAATGCTGTTATAGGGCATCACCGGCTGGACATTGTGTGTCAGCCGCAGTCAAGGCGCGTGCCACCATGTAGCTGGAAGCCCACTGGCGGAGTTCTGACAGGACGAATATTTACCCCAGTTGCAGCAGGGGATACCGCCGCCCGGTTTGATTTATTTGTAGCCGCACAGCCCGGCATCTGATGGAAAAACTGTCTCAGTGTAGAAACTAAGGGCAGCCTGCCTGGGTTACTTCGCGTGCTCCCAAGGCGAGGTTTAACCGGTGATGAGGATTGGTTGCCCGACAACGCGCAGCAGATACGC + ...---2.-..,-,,,.,,,,-,,,,,,.,..,,,-,.,,-/-/--,---,-,,...--,,,,,+,+,,,--,,-,,-,,,++-0,++,,+--+,,,,,+,+++,,,+*,++**++,.,+*+**+++**)*+*.*)***,***,++**))+*)*,))+)*))**++,,+,*++*+*+**-,,+.,+-+,,+++-,,,,,,,,++*+,,+-/-,.,..-,,-.,,,,,,,,,,-,,,--+,,,/.,,,++++++-,-,+++++++*++**++.,++)))))*,++)*+++++*++*)*))****+**++*++*+,++**+++*)))*)****+*+***++*))))))))**,,+,,,)**++*+*))*+***)*)))++**,))*,),)))**)**)+*****),)))****))*+**)),*)***+**++***+*+***+****+*+++++*,,*++,++++,-, @51 ATCATGCGGTTGTTGTAGGGTGCAATTAATGCCCGCTAATCGACCTCTGAATACCACTTCTAGTTAACATAGGAGTCAGTAATGTGCATGCAGTAATGCGTCGCTAATGATTAGAGGAGCGTTTTTACATATTGCATGCGCCACAGGGGGATTCAGTCTTGTGGCAAACACCTGCGTTGCCCGGCGATCGGCTCGAACTTGTGTAGTTTGTCTCACCGTACAGGCGGTTTCCCTGCCAGCAGTGGCTGGAAGGCTAATGGCGTACTCTGACTAGACAGCGATTTTACCCCATTGCCAGGAGCAAATGAGCAGGATTACCTTTATTGCCGGAACTAAGCCCGGGAGTACATGAATAAGAAAAACTGTCAGAAGTGGGTAGCGCCTGGGGTACTTGGGCGTGGCTCATTCAGTCTATCACGCGGGTCATTACGCCCGTACTCATGGCGCGTGAGTTGCACTAGCAGAGCAGCGTAGA + -,-.,/--.-.-,,++,+++.,+-,-+,+.,-+,+-,+,,,,,,-,--/--,-.,+..,+,*+,++,--,,,,,--,,,+,-,,,+.-,/-,-.----.-.,,,+-----,,,.,,..,---.-,,+,----...-.0.-.+/.,,,-,,,,,.,-,,+-+++++*+,-,++-+,,+-+,,++./+-+)*++*+++,+,+-,+-**-*+++*++.,,.-,-,,+--++.,,-+,,,,,,-/,.,.--.,-.,-.+.-.-/.0-//--0.0--.-.0..00,/010,-/0/-----.+-/----,./-.--,-,.-.-.,.,.,.,/-,,++--+,,,,-,,+-**-,..,-++-+**-,,+--+*,,*-*,++**,*+*+,,+-+,++-++++-,.,.+-++,-+,+.+++-*+++,+.--++,-,-,,.--,,.,.,+--,,,.,-,.,,--+,--,+,,-/-.,../-/.,/- @52 ATGTAATGCGCTTTGTTGGCGCGGTGGGATCTGCAGGGCTAATGGTACACCTCTGGCAACCACTTTCCATGACAGGAGTTGAATATGGCATTCGCTAGTAATCCCTTCGATGATCCGCAAGGGAGCGTTTTACATATTGCGCAATGCGCAGGGGCAATTCAGTCTGTGGCCCGCAACAATGCGTCTTACCGGCAGGCTGGGACATTGTGTGTCAGCCGCAGTCACAGGCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGGGCGTACTCTGACACCGACACGAATTTTACCCAGTGCAGGAGGCACAATGAGCCAGCATTTACCTTTGGTCGCCGCACAGCCCGGCATCTGGATGGCAGAAAAACTGTCTAGAATTACCCTCCGCCTGAGCGTGGCGCATTACGTTGAGCGAGTTAACCGGAGAGGTTGATTCGCCATATTACTGAGGGATGGTTAGCAAGCGACCCTGCGGAGTTGCCGTTTTACACGCGCAACCAAGGATTTCGG + ,-++**+,-..,.**+*+,,,-++*+,++,-++++,++++,,,,,++-++-,,,++,+++/-+++**-+*,,,-,/.,.,++,,,+-+--,,+,/+,,--,..,-,,+--/,-,-,.-,-..--,1,,+++--1----,++.,--.,-,-,-,,,,--.-,/-,++*+++**+,*+0.,-/+-++****,+,-,,,++,**-*++++,++********/+*,.+*++/+)***+,**+*-++,,*+*))))+**,*,+**++++++++,,,,+***-*+*,,--,-*,*+,,+*,+++-+,-,,1,--+,,,,-/0--.+--.,,,,-.,-./,-...-0..-0//././/..../.-/.110..-,+,+-1//.,...---.-../,-/-,+,,,,,+++/++****,***+)))))******)+),)((((''('''')&'&'&'''&(&&&&(&''&&&&&&&'(&&&&&''&&%%%$#$#$$%$$$%$$$&$$%$$%$&&#$ @53 ATGATGCGAGGTCGTTGGCGCGGTGGCTTGATGCAGGGGCTAATCGACCTCTGGCAACCACTTTTCCATGACAGGAGTCGAATATGGCATTCAGTAATCCCTTCGATGATCCGCAGGGAGCGTTTACATATCCTAATGCGCAGAGCCAATTCAGTCTGTGGCCGCAACAATGCGTATGTACCGGCAGGCTGGGACATTGTGTGTCAGCCGCATTCAAGGCGTCCTGCCAGCAGTGGCTGGAAGCCCATGATCGCTATATCGAGACACCGACGAATTTTTACCCAGTTGCAGGAGGCACAATGAGCCAGCATTTACCTTTGGTCGCCGCACAGCCCGGCAACCCGGATGGCAGAAAAACTGTCAGCAACGCCTCCGCCTGGAGCGTGGCGCCGATGAGTTAACCGGAGAGTGATTTCGCCATTACTGGCCCGCCCCGCGGTGGTTGCCGGACTAGCAAGCAGATACGC + -.0..//--/,./0/./-..//-///.--202/1/..01/.01121011300400120434141102011131323/00201110/01100120200/12001././11///./..-.--240.-,....-.,-,--0/-.----+,,....,.,..,-,,--,,++,.--/2,,+/+,+,,-,.--..,,++-.-...----,-,/.1-/--,/+.--/-,+-++,,-,-/,,0---,+-...,---//.-.,,-----,,+,-/--,,-+------.,-,-..-/.,..-..,-,,-.-.,+.--,.../.,--0-,-,---..---...//....-.-./,--..----,/.12././,,/-,..0/--../0..././//..------+-.,..--....--,+,,.-.,.,--.,.,,,-2,,+,.++++**+**,*++0-,+++,+*)***++*.*)*,++ @54 GCCGCACAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAATTACCCTCCGCCTGGAGCGTGGCGCATTACGTTGAGTTAACCGGAGAGGTTGATTCGCCATTATACCGCCCGCGCGGTGGTTGCCGACGCCCCGGACCGGCATACGC + //./././//...//1//..-..../-/--1//00//-...0/..----/././../--./.--.//...-../-,/,..-..-----/,-,-/,-.,--,./.,--..-.--.,-.+,1-,-,++-,,-.,+,++-+,+,,,,,,-,, spoa-3.0.1/test/spoa_test.cpp000066400000000000000000000751131351220300700161410ustar00rootroot00000000000000/*! * @file spoa_test.cpp * * @brief Spoa unit test source file */ #include "spoa_test_config.h" #include "sequence.hpp" #include "spoa/spoa.hpp" #include "bioparser/bioparser.hpp" #include "gtest/gtest.h" class SpoaAlignmentTest: public ::testing::Test { public: void SetUp(const std::string& file_name, spoa::AlignmentType alignment_type, std::int8_t m, std::int8_t n, std::int8_t g, std::int8_t e, std::int8_t q, std::int8_t c) { parser = bioparser::createParser( file_name); alignment_engine = spoa::createAlignmentEngine(alignment_type, m, n, g, e, q, c); graph = spoa::createGraph(); } void TearDown() {} void initialize() { parser->parse(sequences, -1); std::size_t max_sequence_size = 0; for (const auto& it: sequences) { max_sequence_size = std::max(max_sequence_size, it->data().size()); } alignment_engine->prealloc(max_sequence_size, 4); } void construct_partial_order_graph(bool use_qualities) { for (const auto& it: sequences) { auto alignment = alignment_engine->align(it->data(), graph); if (use_qualities) { graph->add_alignment(alignment, it->data(), it->quality()); } else { graph->add_alignment(alignment, it->data()); } } } std::vector> sequences; std::unique_ptr> parser; std::unique_ptr alignment_engine; std::unique_ptr graph; }; TEST(SpoaTest, AlignmentTypeError) { try { auto alignment_engine = spoa::createAlignmentEngine( static_cast(4), 1, -1, -1); } catch(std::invalid_argument& exception) { EXPECT_STREQ(exception.what(), "[spoa::createAlignmentEngine] error: " "invalid alignment type!"); } } TEST(SpoaTest, EmptyInputError) { auto alignment_engine = spoa::createAlignmentEngine( static_cast(0), 1, -1, -1); auto graph = spoa::createGraph(); auto alignment = alignment_engine->align("", graph); EXPECT_TRUE(alignment.empty()); } TEST_F(SpoaAlignmentTest, GraphClear) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kSW, 5, -4, -8, -8, -8, -8); initialize(); construct_partial_order_graph(false); auto consensus = graph->generate_consensus(); graph->clear(); construct_partial_order_graph(false); auto test = graph->generate_consensus(); EXPECT_TRUE(consensus.compare(test) == 0); } TEST_F(SpoaAlignmentTest, LocalConsensus) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kSW, 5, -4, -8, -8, -8, -8); initialize(); construct_partial_order_graph(false); auto consensus = graph->generate_consensus(); std::string valid_result = "AATGATGCGCTTTGTTGGCGCGGTGGCTTGATGCAGGGGCTAATCGA" "CCTCTGGCAACCACTTTTCCATGACAGGAGTTGAATATGGCATTCAGTAATCCCTTCGATGATCCGCAGG" "GAGCGTTTTACATATTGCGCAATGCGCAGGGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTTACCGGC" "AGGCTGGGACATTGTGTGTCAGCCGCAGTCACAGGCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGGCGT" "ACTCTGACACCGACGAATTTTACCCAGTTGCAGGAGGCACAATGAGCCAGCATTTACCTTTGGTCGCCGC" "ACAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAATTACCCTCCGCCTGGAGCGTGGCGCATTACGTT" "GAGTTAACCGGAGAGGTTGATTCGCCATTACTGGCCCGCGCGGTGGTTGCCGGACTAGCGCAAGCAGATA" "CGCTTTACACGCGCAACCAAGGATTTCGG"; EXPECT_TRUE(consensus.compare(valid_result) == 0); } TEST_F(SpoaAlignmentTest, LocalAffineConsensus) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kSW, 5, -4, -8, -6, -8 ,-6); initialize(); construct_partial_order_graph(false); auto consensus = graph->generate_consensus(); std::string valid_result = "AATGATGCGCTTTGTTGGCGCGGTGGCTTGATGCAGGGGCTAATCGA" "CCTCTGGCAACCACTTTTCCATGACAGGAGTTGAATATGGCATTCAGTAATCCCTTCGATGATCCGCAGG" "GAGCGTTTTACATATTGCGCAATGCGCAGGGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTTACCGGC" "AGGCTGGGACATTGTGTGTCAGCCGCAGTCACAGGCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGGCGT" "ACTCTGACACCGACGAATTTTACCCAGTTGCAGGAGGCACAATGAGCCAGCATTTACCTTTGGTCGCCGC" "ACAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAATTACCCTCCGCCTGGAGCGTGGCGCATTACGTT" "GAGTTAACCGGAGAGGTTGATTCGCCATTACTGGCCCGCGCGGTGGTTGCCGGACTAGCGCAAGCAGATA" "CGCTTTACACGCGCAACCAAGGATTTCGG"; EXPECT_TRUE(consensus.compare(valid_result) == 0); } TEST_F(SpoaAlignmentTest, LocalConvexConsensus) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kSW, 5, -4, -8, -6, -10, -2); initialize(); construct_partial_order_graph(false); auto consensus = graph->generate_consensus(); std::string valid_result = "AATGATGCGCTTTGTTGGCGCGGTGGCTTGATGCAGGGGCTAATCGA" "CCTCTGGCAACCACTTTTCCATGACAGGAGTTGAATATGGCATTCAGTAATCCCTTCGATGATCCGCAGG" "GAGCGTTTTACATATTGCGCAATGCGCAGGGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTTACCGGC" "AGGCTGGGACATTGTGTGTCAGCCGCAGTCACAGGCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGGCGT" "ACTCTGACACCGACGAATTTTACCCAGTTGCAGGGAGGCACAATGAGCCAGCATTTACCTTTGGTCGCCG" "CACAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAATTACCCTCCGCCTGGAGCGTGGCGCATTACGT" "TGAGTTAACCGGAGAGGTTGATTCGCCATTACTGGCCCGCGCGGTGGTTGCCGGACTAGCGCAAAGCAGA" "TACGCTG"; EXPECT_TRUE(consensus.compare(valid_result) == 0); } TEST_F(SpoaAlignmentTest, LocalConsensusWithQualities) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kSW, 5, -4, -8, -8, -8, -8); initialize(); construct_partial_order_graph(true); auto consensus = graph->generate_consensus(); std::string valid_result = "AATGATGCGCTTTGTTGGCGCGGTGGCTTGATGCAGGGGCTAATCGA" "CCTCTGGCAACCACTTTTCCATGACAGGAGTTGAATATGGCATTCAGTAATCCCTTCGATGATCCGCAGG" "GAGCGTTTTACATATTGCGCAATGCGCAGGGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTTACCGGC" "AGGCTGGGACATTGTGTGTCAGCCGCAGTCACAGGCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGGCGT" "ACTCTGACACCGACGAATTTTACCCAGTTGCAGGAGGCACAATGAGCCAGCATTTACCTTTGGTCGCCGC" "ACAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAATTACCCTCCGCCTGGAGCGTGGCGCATTACGTT" "GAGTTAACCGGAGAGGTTGATTCGCCATTACTGGCCCGCGCGGTGGTTGCCGGACTAGCGCAAGCAGATA" "CGCTTTACACGCGCAACCAAGGATTTCGG"; EXPECT_TRUE(consensus.compare(valid_result) == 0); } TEST_F(SpoaAlignmentTest, LocalAffineConsensusWithQualities) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kSW, 5, -4, -8, -6, -8, -6); initialize(); construct_partial_order_graph(true); auto consensus = graph->generate_consensus(); std::string valid_result = "AATGATGCGCTTTGTTGGCGCGGTGGCTTGATGCAGGGGCTAATCGA" "CCTCTGGCAACCACTTTTCCATGACAGGAGTTGAATATGGCATTCAGTAATCCCTTCGATGATCCGCAGG" "GAGCGTTTTACATATTGCGCAATGCGCAGGGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTTACCGGC" "AGGCTGGGACATTGTGTGTCAGCCGCAGTCACAGGCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGGCGT" "ACTCTGACACCGACGAATTTTACCCAGTTGCAGGAGGCACAATGAGCCAGCATTTACCTTTGGTCGCCGC" "ACAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAATTACCCTCCGCCTGGAGCGTGGCGCATTACGTT" "GAGTTAACCGGAGAGGTTGATTCGCCATTACTGGCCCGCGCGGTGGTTGCCGGACTAGCGCAAGCAGATA" "CGCTTTACACGCGCAACCAAGGATTTCGG"; EXPECT_TRUE(consensus.compare(valid_result) == 0); } TEST_F(SpoaAlignmentTest, LocalConvexConsensusWithQualities) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kSW, 5, -4, -8, -6, -10, -2); initialize(); construct_partial_order_graph(true); auto consensus = graph->generate_consensus(); std::string valid_result = "AATGATGCGCTTTGTTGGCGCGGTGGCTTGATGCAGGGGCTAATCGA" "CCTCTGGCAACCACTTTTCCATGACAGGAGTTGAATATGGCATTCAGTAATCCCTTCGATGATCCGCAGG" "GAGCGTTTTACATATTGCGCAATGCGCAGGGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTTACCGGC" "AGGCTGGGACATTGTGTGTCAGCCGCAGTCACAGGCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGGCGT" "ACTCTGACACCGACGAATTTTACCCAGTTGCAGGGAGGCACAATGAGCCAGCATTTACCTTTGGTCGCCG" "CACAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAATTACCCTCCGCCTGGAGCGTGGCGCATTACGT" "TGAGTTAACCGGAGAGGTTGATTCGCCATTACTGGCCCGCGCGGTGGTTGCCGGACTAGCGCAAGCAGAT" "ACGCTG"; EXPECT_TRUE(consensus.compare(valid_result) == 0); } TEST_F(SpoaAlignmentTest, GlobalConsensus) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kNW, 5, -4, -8, -8, -8, -8); initialize(); construct_partial_order_graph(false); auto consensus = graph->generate_consensus(); std::string valid_result = "ATGATGCGCTTTGTTGGCGCGGTGGCTTGATGCAGGGGCTAATCGAC" "CTCTGGCAACCACTTTTCCATGACAGGAGTTGAATATGGCATTCAGTAATCCCTTCGATGATCCGCAGGG" "AGCGTTTTACATATTGCGCAATGCGCAGGGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTTACCGGCA" "GGCTGGGACATTGTGTGTCAGCCGCAGTCACAGGCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGGCGTA" "CTCTGACACCGACGAATTTTACCCAGTTGCAGGAGGCACAATGAGCCAGCATTTACCTTTGGTCGCCGCA" "CAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAATTACCCTCCGCCTGGAGCGTGGCGCATTACGTTG" "AGTTAACCGGAGAGGTTGATTCGCCATTACTGGCCCGCGCGGTGGTTGCCGGACTAGCGCAAGCAGATAC" "GC"; EXPECT_TRUE(consensus.compare(valid_result) == 0); } TEST_F(SpoaAlignmentTest, GlobalAffineConsensus) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kNW, 5, -4, -8, -6, -8, -6); initialize(); construct_partial_order_graph(false); auto consensus = graph->generate_consensus(); std::string valid_result = "ATGATGCGCTTTGTTGGCGCGGTGGCTTGATGCAGGGGCTAATCGAC" "CTCTGGCAACCACTTTTCCATGACAGGAGTTGAATATGGCATTCAGTAATCCCTTCGATGATCCGCAGGG" "AGCGTTTTACATATTGCGCAATGCGCAGGGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTTACCGGCA" "GGCTGGGACATTGTGTGTCAGCCGCAGTCACAGGCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGGCGTA" "CTCTGACACCGACGAATTTTACCCAGTTGCAGGAGGCACAATGAGCCAGCATTTACCTTTGGTCGCCGCA" "CAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAATTACCCTCCGCCTGGAGCGTGGCGCATTACGTTG" "AGTTAACCGGAGAGGTTGATTCGCCATTACTGGCCCGCGCGGTGGTTGCCGGACTAGCGCAAGCAGATAC" "GC"; EXPECT_TRUE(consensus.compare(valid_result) == 0); } TEST_F(SpoaAlignmentTest, GlobalConvexConsensus) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kNW, 5, -4, -8, -6, -10, -2); initialize(); construct_partial_order_graph(false); auto consensus = graph->generate_consensus(); std::string valid_result = "ATGATGCGCTTTGTTGGCGCGGTGGCTTGATGCAGGGGCTAATCGAC" "CTCTGGCAACCACTTTTCCATGACAGGAGTTGAATATGGCATTCAGTAATCCCTTCGATGATCCGCAGGG" "AGCGTTTTACATATTGCGCAATGCGCAGGGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTTACCGGCA" "GGCTGGGACATTGTGTGTCAGCCGCAGTCACAGGCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGGCGTA" "CTCTGACACCGACGAATTTTACCCAGTTGCAGGGAGGCACAATGAGCCAGCATTTACCTTTGGTCGCCGC" "ACAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAATTACCCTCCGCCTGGAGCGTGGCGCATTACGTT" "GAGTTAACCGGAGAGGTTGATTCGCCATTACTGGCCCGCGCGGTGGTTGCCGGACTAGCGCAAAGCAGAT" "ACGC"; EXPECT_TRUE(consensus.compare(valid_result) == 0); } TEST_F(SpoaAlignmentTest, GlobalConsensusWithQualities) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kNW, 5, -4, -8, -8, -8, -8); initialize(); construct_partial_order_graph(true); auto consensus = graph->generate_consensus(); std::string valid_result = "ATGATGCGCTTTGTTGGCGCGGTGGCTTGATGCAGGGGCTAATCGAC" "CTCTGGCAACCACTTTTCCATGACAGGAGTTGAATATGGCATTCAGTAATCCCTTCGATGATCCGCAGGG" "AGCGTTTTACATATTGCGCAATGCGCAGGGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTTACCGGCA" "GGCTGGGACATTGTGTGTCAGCCGCAGTCACAGGCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGGCGTA" "CTCTGACACCGACGAATTTTACCCAGTTGCAGGAGGCACAATGAGCCAGCATTTACCTTTGGTCGCCGCA" "CAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAATTACCCTCCGCCTGGAGCGTGGCGCATTACGTTG" "AGTTAACCGGAGAGGTTGATTCGCCATTACTGGCCCGCGCGGTGGTTGCCGGACTAGCGCAAGCAGATAC" "GC"; EXPECT_TRUE(consensus.compare(valid_result) == 0); } TEST_F(SpoaAlignmentTest, GlobalAffineConsensusWithQualities) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kNW, 5, -4, -8, -6, -8, -6); initialize(); construct_partial_order_graph(true); auto consensus = graph->generate_consensus(); std::string valid_result = "ATGATGCGCTTTGTTGGCGCGGTGGCTTGATGCAGGGGCTAATCGAC" "CTCTGGCAACCACTTTTCCATGACAGGAGTTGAATATGGCATTCAGTAATCCCTTCGATGATCCGCAGGG" "AGCGTTTTACATATTGCGCAATGCGCAGGGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTTACCGGCA" "GGCTGGGACATTGTGTGTCAGCCGCAGTCACAGGCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGGCGTA" "CTCTGACACCGACGAATTTTACCCAGTTGCAGGAGGCACAATGAGCCAGCATTTACCTTTGGTCGCCGCA" "CAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAATTACCCTCCGCCTGGAGCGTGGCGCATTACGTTG" "AGTTAACCGGAGAGGTTGATTCGCCATTACTGGCCCGCGCGGTGGTTGCCGGACTAGCGCAAGCAGATAC" "GC"; EXPECT_TRUE(consensus.compare(valid_result) == 0); } TEST_F(SpoaAlignmentTest, GlobalConvexConsensusWithQualities) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kNW, 5, -4, -8, -6, -10, -2); initialize(); construct_partial_order_graph(true); auto consensus = graph->generate_consensus(); std::string valid_result = "ATGATGCGCTTTGTTGGCGCGGTGGCTTGATGCAGGGGCTAATCGAC" "CTCTGGCAACCACTTTTCCATGACAGGAGTTGAATATGGCATTCAGTAATCCCTTCGATGATCCGCAGGG" "AGCGTTTTACATATTGCGCAATGCGCAGGGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTTACCGGCA" "GGCTGGGACATTGTGTGTCAGCCGCAGTCACAGGCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGGCGTA" "CTCTGACACCGACGAATTTTACCCAGTTGCAGGGAGGCACAATGAGCCAGCATTTACCTTTGGTCGCCGC" "ACAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAATTACCCTCCGCCTGGAGCGTGGCGCATTACGTT" "GAGTTAACCGGAGAGGTTGATTCGCCATTACTGGCCCGCGCGGTGGTTGCCGGACTAGCGCAAAGCAGAT" "ACGC"; EXPECT_TRUE(consensus.compare(valid_result) == 0); } TEST_F(SpoaAlignmentTest, SemiGlobalConsensus) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kOV, 5, -4, -8, -8, -8, -8); initialize(); construct_partial_order_graph(false); auto consensus = graph->generate_consensus(); std::string valid_result = "ACATGATGCGCTTTGTTGGCGCGGTGGCTTGATGCAGGGGCTAATCG" "ACCTCTGGCAACCACTTTTCCATGACAGGAGTTGAATATGGCATTCAGTAATCCCTTCGATGATCCGCAG" "GGAGCGTTTTACATATTGCGCAATGCGCAGGGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTTACCGG" "CAGGCTGGGACATTGTGTGTCAGCCGCAGTCACAGGCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGGCG" "TACTCTGACACCGACGAATTTTACCCAGTTGCAGGAGGCACAATGAGCCAGCATTTACCTTTGGTCGCCG" "CACAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAATTACCCTCCGCCTGGAGCGTGGCGCATTACGT" "TGAGTTAACCGGAGAGGTTGATTCGCCATTACTGGCCCGCGCGGTGGTTGCCGGACTAGCGCAAGCAGAT" "ACGCGTTTTACACGCGCAACCAAGGATTTCGG"; EXPECT_TRUE(consensus.compare(valid_result) == 0); } TEST_F(SpoaAlignmentTest, SemiGlobalAffineConsensus) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kOV, 5, -4, -8, -6, -8, -6); initialize(); construct_partial_order_graph(false); auto consensus = graph->generate_consensus(); std::string valid_result = "GTATGATGCGCTTTGTTGGCGCGGTGGCTTGATGCAGGGGCTAATCG" "ACCTCTGGCAACCACTTTTCCATGACAGGAGTTGAATATGGCATTCAGTAATCCCTTCGATGATCCGCAG" "GGAGCGTTTTACATATTGCGCAATGCGCAGGGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTTACCGG" "CAGGCTGGGACATTGTGTGTCAGCCGCAGTCACAGGCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGGCG" "TACTCTGACACCGACGAATTTTACCCAGTTGCAGGAGGCACAATGAGCCAGCATTTACCTTTGGTCGCCG" "CACAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAATTACCCTCCGCCTGGAGCGTGGCGCATTACGT" "TGAGTTAACCGGAGAGGTTGATTCGCCATTACTGGCCCGCGCGGTGGTTGCCGGACTAGCGCAAGCAGAT" "ACGCGTTTTACACGCGCAACCAAGGATTTCGG"; EXPECT_TRUE(consensus.compare(valid_result) == 0); } TEST_F(SpoaAlignmentTest, SemiGlobalConvexConsensus) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kOV, 5, -4, -8, -6, -10, -2); initialize(); construct_partial_order_graph(false); auto consensus = graph->generate_consensus(); std::string valid_result = "GTATGATGCGCTTTGTTGGCGCGGTGGCTTGATGCAGGGGCTAATCG" "ACCTCTGGCAACCACTTTTCCATGACAGGAGTTGAATATGGCATTCAGTAATCCCTTCGATGATCCGCAG" "GGAGCGTTTTACATATTGCGCAATGCGCAGGGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTTACCGG" "CAGGCTGGGACATTGTGTGTCAGCCGCAGTCACAGGCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGGCG" "TACTCTGACACCGACGAATTTTACCCAGTTGCAGGGAGGCACAATGAGCCAGCATTTACCTTTGGTCGCC" "GCACAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAATTACCCTCCGCCTGGAGCGTGGCGCATTACG" "TTGAGTTAACCGGAGAGGTTGATTCGCCATTACTGGCCCGCGCGGTGGTTGCCGGACTAGCGCAAAGCAG" "ATACGC"; EXPECT_TRUE(consensus.compare(valid_result) == 0); } TEST_F(SpoaAlignmentTest, SemiGlobalConsensusWithQualities) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kOV, 5, -4, -8, -8, -8, -8); initialize(); construct_partial_order_graph(true); auto consensus = graph->generate_consensus(); std::string valid_result = "ACATGATGCGCTTTGTTGGCGCGGTGGCTTGATGCAGGGGCTAATCG" "ACCTCTGGCAACCACTTTTCCATGACAGGAGTTGAATATGGCATTCAGTAATCCCTTCGATGATCCGCAG" "GGAGCGTTTTACATATTGCGCAATGCGCAGGGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTTACCGG" "CAGGCTGGGACATTGTGTGTCAGCCGCAGTCACAGGCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGGCG" "TACTCTGACACCGACGAATTTTACCCAGTTGCAGGAGGCACAATGAGCCAGCATTTACCTTTGGTCGCCG" "CACAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAATTACCCTCCGCCTGGAGCGTGGCGCATTACGT" "TGAGTTAACCGGAGAGGTTGATTCGCCATTACTGGCCCGCGCGGTGGTTGCCGGACTAGCGCAAGCAGAT" "ACGCGTTTTACACGCGCAACCAAGGATTTCGG"; EXPECT_TRUE(consensus.compare(valid_result) == 0); } TEST_F(SpoaAlignmentTest, SemiGlobalAffineConsensusWithQualities) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kOV, 5, -4, -8, -6, -8, -6); initialize(); construct_partial_order_graph(true); auto consensus = graph->generate_consensus(); std::string valid_result = "ACATGATGCGCTTTGTTGGCGCGGTGGCTTGATGCAGGGGCTAATCG" "ACCTCTGGCAACCACTTTTCCATGACAGGAGTTGAATATGGCATTCAGTAATCCCTTCGATGATCCGCAG" "GGAGCGTTTTACATATTGCGCAATGCGCAGGGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTTACCGG" "CAGGCTGGGACATTGTGTGTCAGCCGCAGTCACAGGCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGGCG" "TACTCTGACACCGACGAATTTTACCCAGTTGCAGGAGGCACAATGAGCCAGCATTTACCTTTGGTCGCCG" "CACAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAATTACCCTCCGCCTGGAGCGTGGCGCATTACGT" "TGAGTTAACCGGAGAGGTTGATTCGCCATTACTGGCCCGCGCGGTGGTTGCCGGACTAGCGCAAGCAGAT" "ACGCGTTTTACACGCGCAACCAAGGATTTCGG"; EXPECT_TRUE(consensus.compare(valid_result) == 0); } TEST_F(SpoaAlignmentTest, SemiGlobalConvexConsensusWithQualities) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kOV, 5, -4, -8, -6, -10, -2); initialize(); construct_partial_order_graph(true); auto consensus = graph->generate_consensus(); std::string valid_result = "GTATGATGCGCTTTGTTGGCGCGGTGGCTTGATGCAGGGGCTAATCG" "ACCTCTGGCAACCACTTTTCCATGACAGGAGTTGAATATGGCATTCAGTAATCCCTTCGATGATCCGCAG" "GGAGCGTTTTACATATTGCGCAATGCGCAGGGGCAATTCAGTCTGTGGCCGCAACAATGCGTCTTACCGG" "CAGGCTGGGACATTGTGTGTCAGCCGCAGTCACAGGCGTCCTGCCAGCAGTGGCTGGAAGCCCACTGGCG" "TACTCTGACACCGACGAATTTTACCCAGTTGCAGGGAGGCACAATGAGCCAGCATTTACCTTTGGTCGCC" "GCACAGCCCGGCATCTGGATGGCAGAAAAACTGTCAGAATTACCCTCCGCCTGGAGCGTGGCGCATTACG" "TTGAGTTAACCGGAGAGGTTGATTCGCCATTACTGGCCCGCGCGGTGGTTGCCGGACTAGCGCAAAGCAG" "ATACGC"; EXPECT_TRUE(consensus.compare(valid_result) == 0); } TEST_F(SpoaAlignmentTest, LocalMSA) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kSW, 5, -4, -8, -8, -8, -8); initialize(); construct_partial_order_graph(false); std::vector msa; graph->generate_multiple_sequence_alignment(msa); EXPECT_TRUE(msa.size() == sequences.size()); for (std::uint32_t i = 0; i < msa.size(); ++i) { std::string tmp = ""; for (const auto& c: msa[i]) { if (c != '-') tmp += c; } EXPECT_TRUE(tmp.size() == sequences[i]->data().size()); EXPECT_TRUE(tmp.compare(sequences[i]->data()) == 0); } } TEST_F(SpoaAlignmentTest, LocalAffineMSA) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kSW, 5, -4, -8, -6, -8, -6); initialize(); construct_partial_order_graph(false); std::vector msa; graph->generate_multiple_sequence_alignment(msa); EXPECT_TRUE(msa.size() == sequences.size()); for (std::uint32_t i = 0; i < msa.size(); ++i) { std::string tmp = ""; for (const auto& c: msa[i]) { if (c != '-') tmp += c; } EXPECT_TRUE(tmp.size() == sequences[i]->data().size()); EXPECT_TRUE(tmp.compare(sequences[i]->data()) == 0); } } TEST_F(SpoaAlignmentTest, LocalConvexMSA) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kSW, 5, -4, -8, -6, -10, -2); initialize(); construct_partial_order_graph(false); std::vector msa; graph->generate_multiple_sequence_alignment(msa); EXPECT_TRUE(msa.size() == sequences.size()); for (std::uint32_t i = 0; i < msa.size(); ++i) { std::string tmp = ""; for (const auto& c: msa[i]) { if (c != '-') tmp += c; } EXPECT_TRUE(tmp.size() == sequences[i]->data().size()); EXPECT_TRUE(tmp.compare(sequences[i]->data()) == 0); } } TEST_F(SpoaAlignmentTest, LocalMSAWithQualities) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kSW, 5, -4, -8, -8, -8, -8); initialize(); construct_partial_order_graph(true); std::vector msa; graph->generate_multiple_sequence_alignment(msa); EXPECT_TRUE(msa.size() == sequences.size()); for (std::uint32_t i = 0; i < msa.size(); ++i) { std::string tmp = ""; for (const auto& c: msa[i]) { if (c != '-') tmp += c; } EXPECT_TRUE(tmp.size() == sequences[i]->data().size()); EXPECT_TRUE(tmp.compare(sequences[i]->data()) == 0); } } TEST_F(SpoaAlignmentTest, LocalAffineMSAWithQualities) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kSW, 5, -4, -8, -6, -8, -6); initialize(); construct_partial_order_graph(true); std::vector msa; graph->generate_multiple_sequence_alignment(msa); EXPECT_TRUE(msa.size() == sequences.size()); for (std::uint32_t i = 0; i < msa.size(); ++i) { std::string tmp = ""; for (const auto& c: msa[i]) { if (c != '-') tmp += c; } EXPECT_TRUE(tmp.size() == sequences[i]->data().size()); EXPECT_TRUE(tmp.compare(sequences[i]->data()) == 0); } } TEST_F(SpoaAlignmentTest, LocalConvexMSAWithQualities) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kSW, 5, -4, -8, -6, -10, -2); initialize(); construct_partial_order_graph(true); std::vector msa; graph->generate_multiple_sequence_alignment(msa); EXPECT_TRUE(msa.size() == sequences.size()); for (std::uint32_t i = 0; i < msa.size(); ++i) { std::string tmp = ""; for (const auto& c: msa[i]) { if (c != '-') tmp += c; } EXPECT_TRUE(tmp.size() == sequences[i]->data().size()); EXPECT_TRUE(tmp.compare(sequences[i]->data()) == 0); } } TEST_F(SpoaAlignmentTest, GlobalMSA) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kNW, 5, -4, -8, -8, -8, -8); initialize(); construct_partial_order_graph(false); std::vector msa; graph->generate_multiple_sequence_alignment(msa); EXPECT_TRUE(msa.size() == sequences.size()); for (std::uint32_t i = 0; i < msa.size(); ++i) { std::string tmp = ""; for (const auto& c: msa[i]) { if (c != '-') tmp += c; } EXPECT_TRUE(tmp.size() == sequences[i]->data().size()); EXPECT_TRUE(tmp.compare(sequences[i]->data()) == 0); } } TEST_F(SpoaAlignmentTest, GlobalAffineMSA) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kNW, 5, -4, -8, -6, -8, -6); initialize(); construct_partial_order_graph(false); std::vector msa; graph->generate_multiple_sequence_alignment(msa); EXPECT_TRUE(msa.size() == sequences.size()); for (std::uint32_t i = 0; i < msa.size(); ++i) { std::string tmp = ""; for (const auto& c: msa[i]) { if (c != '-') tmp += c; } EXPECT_TRUE(tmp.size() == sequences[i]->data().size()); EXPECT_TRUE(tmp.compare(sequences[i]->data()) == 0); } } TEST_F(SpoaAlignmentTest, GlobalConvexMSA) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kNW, 5, -4, -8, -6, -10, -2); initialize(); construct_partial_order_graph(false); std::vector msa; graph->generate_multiple_sequence_alignment(msa); EXPECT_TRUE(msa.size() == sequences.size()); for (std::uint32_t i = 0; i < msa.size(); ++i) { std::string tmp = ""; for (const auto& c: msa[i]) { if (c != '-') tmp += c; } EXPECT_TRUE(tmp.size() == sequences[i]->data().size()); EXPECT_TRUE(tmp.compare(sequences[i]->data()) == 0); } } TEST_F(SpoaAlignmentTest, GlobalMSAWithQualities) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kNW, 5, -4, -8, -8, -8, -8); initialize(); construct_partial_order_graph(true); std::vector msa; graph->generate_multiple_sequence_alignment(msa); EXPECT_TRUE(msa.size() == sequences.size()); for (std::uint32_t i = 0; i < msa.size(); ++i) { std::string tmp = ""; for (const auto& c: msa[i]) { if (c != '-') tmp += c; } EXPECT_TRUE(tmp.size() == sequences[i]->data().size()); EXPECT_TRUE(tmp.compare(sequences[i]->data()) == 0); } } TEST_F(SpoaAlignmentTest, GlobalAffineMSAWithQualities) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kNW, 5, -4, -8, -6, -8, -6); initialize(); construct_partial_order_graph(true); std::vector msa; graph->generate_multiple_sequence_alignment(msa); EXPECT_TRUE(msa.size() == sequences.size()); for (std::uint32_t i = 0; i < msa.size(); ++i) { std::string tmp = ""; for (const auto& c: msa[i]) { if (c != '-') tmp += c; } EXPECT_TRUE(tmp.size() == sequences[i]->data().size()); EXPECT_TRUE(tmp.compare(sequences[i]->data()) == 0); } } TEST_F(SpoaAlignmentTest, GlobalConvexMSAWithQualities) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kNW, 5, -4, -8, -6, -10, -2); initialize(); construct_partial_order_graph(true); std::vector msa; graph->generate_multiple_sequence_alignment(msa); EXPECT_TRUE(msa.size() == sequences.size()); for (std::uint32_t i = 0; i < msa.size(); ++i) { std::string tmp = ""; for (const auto& c: msa[i]) { if (c != '-') tmp += c; } EXPECT_TRUE(tmp.size() == sequences[i]->data().size()); EXPECT_TRUE(tmp.compare(sequences[i]->data()) == 0); } } TEST_F(SpoaAlignmentTest, SemiGlobalMSA) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kOV, 5, -4, -8, -8, -8, -8); initialize(); construct_partial_order_graph(false); std::vector msa; graph->generate_multiple_sequence_alignment(msa); EXPECT_TRUE(msa.size() == sequences.size()); for (std::uint32_t i = 0; i < msa.size(); ++i) { std::string tmp = ""; for (const auto& c: msa[i]) { if (c != '-') tmp += c; } EXPECT_TRUE(tmp.size() == sequences[i]->data().size()); EXPECT_TRUE(tmp.compare(sequences[i]->data()) == 0); } } TEST_F(SpoaAlignmentTest, SemiGlobalAffineMSA) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kOV, 5, -4, -8, -6, -8, -6); initialize(); construct_partial_order_graph(false); std::vector msa; graph->generate_multiple_sequence_alignment(msa); EXPECT_TRUE(msa.size() == sequences.size()); for (std::uint32_t i = 0; i < msa.size(); ++i) { std::string tmp = ""; for (const auto& c: msa[i]) { if (c != '-') tmp += c; } EXPECT_TRUE(tmp.size() == sequences[i]->data().size()); EXPECT_TRUE(tmp.compare(sequences[i]->data()) == 0); } } TEST_F(SpoaAlignmentTest, SemiGlobalConvexMSA) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kOV, 5, -4, -8, -6, -10, -2); initialize(); construct_partial_order_graph(false); std::vector msa; graph->generate_multiple_sequence_alignment(msa); EXPECT_TRUE(msa.size() == sequences.size()); for (std::uint32_t i = 0; i < msa.size(); ++i) { std::string tmp = ""; for (const auto& c: msa[i]) { if (c != '-') tmp += c; } EXPECT_TRUE(tmp.size() == sequences[i]->data().size()); EXPECT_TRUE(tmp.compare(sequences[i]->data()) == 0); } } TEST_F(SpoaAlignmentTest, SemiGlobalMSAWithQualities) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kOV, 5, -4, -8, -8, -8, -8); initialize(); construct_partial_order_graph(true); std::vector msa; graph->generate_multiple_sequence_alignment(msa); EXPECT_TRUE(msa.size() == sequences.size()); for (std::uint32_t i = 0; i < msa.size(); ++i) { std::string tmp = ""; for (const auto& c: msa[i]) { if (c != '-') tmp += c; } EXPECT_TRUE(tmp.size() == sequences[i]->data().size()); EXPECT_TRUE(tmp.compare(sequences[i]->data()) == 0); } } TEST_F(SpoaAlignmentTest, SemiGlobalAffineMSAWithQualities) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kOV, 5, -4, -8, -6, -8, -6); initialize(); construct_partial_order_graph(true); std::vector msa; graph->generate_multiple_sequence_alignment(msa); EXPECT_TRUE(msa.size() == sequences.size()); for (std::uint32_t i = 0; i < msa.size(); ++i) { std::string tmp = ""; for (const auto& c: msa[i]) { if (c != '-') tmp += c; } EXPECT_TRUE(tmp.size() == sequences[i]->data().size()); EXPECT_TRUE(tmp.compare(sequences[i]->data()) == 0); } } TEST_F(SpoaAlignmentTest, SemiGlobalConvexMSAWithQualities) { SetUp(spoa_test_data_path + "sample.fastq", spoa::AlignmentType::kOV, 5, -4, -8, -6, -10, -2); initialize(); construct_partial_order_graph(true); std::vector msa; graph->generate_multiple_sequence_alignment(msa); EXPECT_TRUE(msa.size() == sequences.size()); for (std::uint32_t i = 0; i < msa.size(); ++i) { std::string tmp = ""; for (const auto& c: msa[i]) { if (c != '-') tmp += c; } EXPECT_TRUE(tmp.size() == sequences[i]->data().size()); EXPECT_TRUE(tmp.compare(sequences[i]->data()) == 0); } } spoa-3.0.1/test/spoa_test_config.h.in000066400000000000000000000002461351220300700175330ustar00rootroot00000000000000/*! * @file spoa_test_config.h.in * * @brief Spoa test configuration file */ #include const std::string spoa_test_data_path = "@spoa_test_data_path@"; spoa-3.0.1/vendor/000077500000000000000000000000001351220300700137435ustar00rootroot00000000000000spoa-3.0.1/vendor/bioparser/000077500000000000000000000000001351220300700157315ustar00rootroot00000000000000spoa-3.0.1/vendor/googletest/000077500000000000000000000000001351220300700161175ustar00rootroot00000000000000