pax_global_header00006660000000000000000000000064140461472540014521gustar00rootroot0000000000000052 comment=84b2874e0ba5ffc9abaebe630433a430cd0f69f4 kraken2-2.1.2/000077500000000000000000000000001404614725400130605ustar00rootroot00000000000000kraken2-2.1.2/.gitignore000066400000000000000000000001401404614725400150430ustar00rootroot00000000000000*.o src/build_db src/classify src/estimate_capacity src/dump_table src/lookup_accession_numbers kraken2-2.1.2/.travis.yml000066400000000000000000000015631404614725400151760ustar00rootroot00000000000000version: ~> 1.0 language: cpp sudo: required dist: trusty matrix: include: - compiler: gcc addons: apt: sources: - ubuntu-toolchain-r-test packages: - g++-4.9 env: CXX=g++-4.9 - compiler: gcc addons: apt: sources: - ubuntu-toolchain-r-test packages: - g++-5 env: CXX=g++-5 - compiler: clang addons: apt: sources: - ubuntu-toolchain-r-test - llvm-toolchain-precise-3.6 packages: - clang-3.6 env: CXX=clang++-3.6 - compiler: clang addons: apt: sources: - ubuntu-toolchain-r-test - llvm-toolchain-precise-3.7 packages: - clang-3.7 env: CXX=clang++-3.7 os: - linux script: make -C src all kraken2-2.1.2/CHANGELOG.md000066400000000000000000000132321404614725400146720ustar00rootroot00000000000000# Changelog ## [2.1.2] - 2021-05-10 ### Changed - New retry behavior for kraken2-build --download-library --use-ftp ### Fixed - Bug that caused classification to not work when k-mer length was equal to minimizer length - Missing source file in CMakeLists.txt ## [2.1.1] - 2020-11-08 ### Fixed - Compilation error with GCC 10, omission of cstdint header - Removed --skip-maps from standard install due to addition of plasmids, which require acc/taxid maps ## [2.1.0] - 2020-10-13 ### Added - Small viral reference set and read simulator for future testing - Build options to aid testing - Integration of HLL to estimate distinct minimizer counts ### Changed - Build code now creates databases with deterministic MD5 sums by default; --fast-build option to kraken2-build introduced to access old behavior - Added plasmid library to standard installation set - Updated SILVA to release 138.1 ### Fixed - Modified build code to prevent insertion of minimizers with ambiguous bases - Bug where hit list output in quick mode used internal taxid (not external) - No more attempts to download "na" paths from FTP site - Runaway memory usage bug with unpaired classification (thanks to D. Cameron) - Modified estimation code to better handle small reference libraries ## [2.0.9] - 2020-04-07 (beta) ### Added - Expose --load-factor setting to kraken2-build - New --minimum-hit-groups option to kraken2 ### Changed - Require 2 hit groups (set of overlapping k-mers w/ same minimizer) to make classification by default - Allow build options to pass through to subsequent invocations (e.g., k-mer length for 16S DBs) - Removed env options for library downloads (no longer available from same NCBI location) - Updated SILVA to release 138 ### Fixed - Removed mention of --fastq-input from Manual - Made PE read identifier suffix trimming more restrictive (only on /1 and /2) - Bug where some reads would be classified with taxid 0 - Bug that didn't allow kraken2-inspect to work with large databases ## [2.0.8] - 2019-04-25 (beta) ### Added - FTP downloading option for taxonomy/libraries (--use-ftp for kraken2-build) - Option to skip downloading taxonomy maps ### Changed - Added lookup table to speed up parsing in MinimizerScanner class - Default parameters for minimizer lengths and spaces changed (spaces=7 for nucleotide search, length=12 for translated search) ### Fixed - Linked space expansion value for proteins to constant used by MinimizerScanner - Reporting of taxids in classified-out sequence files - Confidence scoring bug associated with failure to leave some sequences unclassified - Reverse complement shifting bug, code made backwards-compatible with existing databases (newly created DBs will have fix) - NCBI taxonomy download error due to removal of EST/GSS files ## [2.0.7] - 2018-08-11 (beta) ### Added - Disk usage info to kraken2-build --clean - Memory allocation error message for hash table - Option for --max-db-size and hash downsampling - Multithreading to kraken2-inspect ### Changed - Move to /usr/bin/env for perl scripts - Add DB loading message to keep people from killing processes early - Add flag files for resuming download of nucleotide accession map data - Converted lookup_accession_numbers script into C++ program w/ memory mapping - Clarified in manual that one or more libraries allowed for custom DBs - Silenced progress messages in C++ programs for non-TTY stderr - Taxonomy downloads switched to rsync from wget (ftp) - Removed '%' from reports ### Fixed - Allow d/l of protozoa library w/ kraken2-build script - Filenames for SILVA database taxonomy info - Typo in manual for output format example - Corrected default space count in manual - Removed obvious race condition in --add-to-library functionality - Corrected behavior of --classified-out and --unclassified-out (no longer forcing .fq/.fa file extensions, respecting '#' in paired mode) - Usage message in kraken2-inspect - Taxonomy creation for 16S databases ## [2.0.6] - 2018-06-13 (beta) ### Changed - New DB summary info printed out w/ inspect script + --skip-counts option ### Fixed - Now stripping carriage returns and other trailing whitespace from sequence data - Treating l-mers immediately following ambiguous characters as ambiguous until a full k-mer is processed - Bug in expansion of spaced seed masks that left spaces at end ## [2.0.5] - 2018-05-21 (beta) ### Added - New kraken2-inspect script to report minimizer counts per taxon ### Changed - Kraken 2X build now adds terminators to all reference sequences ## [2.0.4] - 2018-05-06 (beta) ### Fixed - Improved portability to older g++ by removing initialization of variable-length string. ## [2.0.3] - 2018-02-12 (alpha) ### Added - Reporting options to kraken2 script (like Kraken 1's kraken-report and kraken-mpa-report) ### Changed - Made loading to RAM default option, added --memory-mapping option to kraken2 ## [2.0.2] - 2018-02-04 (alpha) ### Added - Low base quality masking option ### Changed - Moved low-complexity masking to library download/addition, out of build process - Made no masking default for human genome in standard installation ## [2.0.1] - 2018-01-01 (alpha) ### Added - Low-complexity sequence masking as a default - UniVec/UniVec_Core databases to supported downloads - UniVec_Core & human in standard Kraken 2 DB - 16S DB support (Greengenes, Silva, RDP) - --use-names flag for kraken2 script - Priority queue to ensure classifier output order matches input order when multi-threading - Changelog ### Changed - Reduced amino acid alphabet (requires rebuild of old protein DBs) - Operating manual ### Fixed - kraken2 now allows compression & paired processing at same time ## [2.0.0] - 2017-12-04 (alpha, initial release) kraken2-2.1.2/CMakeLists.txt000066400000000000000000000011521404614725400156170ustar00rootroot00000000000000cmake_minimum_required(VERSION 2.8) project(kraken2) if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) endif() set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD_REQUIRED ON) add_compile_options(-Wall) add_definitions(-DLINEAR_PROBING) set(CMAKE_CXX_FLAGS_DEBUG "-g") set(CMAKE_CXX_FLAGS_RELEASE "-O3") include(FindOpenMP) if(OPENMP_FOUND) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") else(OPENMP_FOUND) message("ERROR: OpenMP could not be found.") endif(OPENMP_FOUND) add_subdirectory(src) kraken2-2.1.2/LICENSE000066400000000000000000000020741404614725400140700ustar00rootroot00000000000000The MIT License (MIT) Copyright (c) 2017-2021 Derrick Wood Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. kraken2-2.1.2/README.md000066400000000000000000000043221404614725400143400ustar00rootroot00000000000000[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/kraken2/README.html) [![European Galaxy server](https://img.shields.io/badge/usegalaxy-.eu-brightgreen?logo=)](https://usegalaxy.eu/root?tool_id=kraken2) kraken2 ======= The second version of the Kraken taxonomic sequence classification system Please refer to the Operating Manual (in docs/MANUAL.html) for details on how to use Kraken 2. kraken2-2.1.2/data/000077500000000000000000000000001404614725400137715ustar00rootroot00000000000000kraken2-2.1.2/data/COVID_19.fa000066400000000000000000000732731404614725400154720ustar00rootroot00000000000000>kraken:taxid|2697049|NC_045512.2 Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1, complete genome ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAA AATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGG ACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTT CGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGC CTGTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACAT CTTAAAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAA ACGTTCGGATGCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATTCAGTACGGTC GTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAG AACGGTAATAAAGGAGCTGGTGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTAGGCGACGAGCTTGGCACTGA TCCTTATGAAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTTAACG GAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGATGGCTACCCTCTTGAGTGCATTAAAGACCTTCTA GCACGTGCTGGTAAAGCTTCATGCACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCG TGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAAT TGGCAAAGAAATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATTCCATAATCAAGACTATTCAA CCAAGGGTTGAAAAGAAAAAGCTTGATGGCTTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATG CAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGCAGACGGGCGATTTTGTTAAAG CCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGAAGGTGCCACTACTTGTGGTTACTTACCCCAAAATGCTGTT GTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAGGACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGG CTTGAAAACCATTCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTCTTATGTTGGTTGCCATAACA AGTGTGCCTATTGGGTTCCACGTGCTAGCGCTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGT CTTAATGACAACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGA GATCGCCATTATTTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGATTATAAAGCAT TCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTACAAAAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAA CAGAAATCAATACTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGATCAATTTTCTCCCGCACTCT TGAAACTGCTCAAAATTCTGTGCGTGTTTTACAGAAGGCCGCTATAACAATACTAGATGGAATTTCACAGTATTCACTGA GACTCATTGATGCTATGATGTTCACATCTGATTTGGCTACTAACAATCTAGTTGTAATGGCCTACATTACAGGTGGTGTT GTTCAGTTGACTTCGCAGTGGCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGATTGGCTTGA AGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGACGGTTGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAA TTGTCGGTGGACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCTTTAAGCTTGTAAATAAATTT TTGGCTTTGTGTGCTGACTCTATCATTATTGGTGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCA CTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCCTACTCATGCCTCTAAAAGCCCCAAAAGAAA TTATCTTCTTAGAGGGAGAAACACTTCCCACAGAAGTGTTAACAGAGGAAGTTGTCTTGAAAACTGGTGATTTACAACCA TTAGAACAACCTACTAGTGAAGCTGTTGAAGCTCCATTGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGA AATCAAAGACACAGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATACCTTCACACTCAAAGGCGGTG CACCAACAAAGGTTACTTTTGGTGATGACACTGTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCACTTTTGAACTT GATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGTTGAACTCGGTACAGAAGTAAATGAGTTCGC CTGTGTTGTGGCAGATGCTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACCACTGGGCATTGATTTAGATG AGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGGTGAGTTTAAATTGGCTTCACATATGTATTGTTCTTTCTAC CCTCCAGATGAGGATGAAGAAGAAGGTGATTGTGAAGAAGAAGAGTTTGAGCCATCAACTCAATATGAGTATGGTACTGA AGATGATTACCAAGGTAAACCTTTGGAATTTGGTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGAAGAAGATT GGTTAGATGATGATAGTCAACAAACTGTTGGTCAACAAGACGGCAGTGAGGACAATCAGACAACTACTATTCAAACAATT GTTGAGGTTCAACCTCAATTAGAGATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTAGTGGTTATTT AAAACTTACTGACAATGTATACATTAAAAATGCAGACATTGTGGAAGAAGCTAAAAAGGTAAAACCAACAGTGGTTGTTA ATGCAGCCAATGTTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAATAAGGCTACTAACAATGCCATGCAAGTT GAATCTGATGATTACATAGCTACTAATGGACCACTTAAAGTGGGTGGTAGTTGTGTTTTAAGCGGACACAATCTTGCTAA ACACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGTGAAGACATTCAACTTCTTAAGAGTGCTTATGAAAATTTTA ATCAGCACGAAGTTCTACTTGCACCATTATTATCAGCTGGTATTTTTGGTGCTGACCCTATACATTCTTTAAGAGTTTGT GTAGATACTGTTCGCACAAATGTCTACTTAGCTGTCTTTGATAAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGA AATGAAGAGTGAAAAGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGTTAAGCCATTTATAACTGAAAGTA AACCTTCAGTTGAACAGAGAAAACAAGATGATAAGAAAATCAAAGCTTGTGTTGAAGAAGTTACAACAACTCTGGAAGAA ACTAAGTTCCTCACAGAAAACTTGTTACTTTATATTGACATTAATGGCAATCTTCATCCAGATTCTGCCACTCTTGTTAG TGACATTGACATCACTTTCTTAAAGAAAGATGCTCCATATATAGTGGGTGATGTTGTTCAAGAGGGTGTTTTAACTGCTG TGGTTATACCTACTAAAAAGGCTGGTGGCACTACTGAAATGCTAGCGAAAGCTTTGAGAAAAGTGCCAACAGACAATTAT ATAACCACTTACCCGGGTCAGGGTTTAAATGGTTACACTGTAGAGGAGGCAAAGACAGTGCTTAAAAAGTGTAAAAGTGC CTTTTACATTCTACCATCTATTATCTCTAATGAGAAGCAAGAAATTCTTGGAACTGTTTCTTGGAATTTGCGAGAAATGC TTGCACATGCAGAAGAAACACGCAAATTAATGCCTGTCTGTGTGGAAACTAAAGCCATAGTTTCAACTATACAGCGTAAA TATAAGGGTATTAAAATACAAGAGGGTGTGGTTGATTATGGTGCTAGATTTTACTTTTACACCAGTAAAACAACTGTAGC GTCACTTATCAACACACTTAACGATCTAAATGAAACTCTTGTTACAATGCCACTTGGCTATGTAACACATGGCTTAAATT TGGAAGAAGCTGCTCGGTATATGAGATCTCTCAAAGTGCCAGCTACAGTTTCTGTTTCTTCACCTGATGCTGTTACAGCG TATAATGGTTATCTTACTTCTTCTTCTAAAACACCTGAAGAACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAA AGATTGGTCCTATTCTGGACAATCTACACAACTAGGTATAGAATTTCTTAAGAGAGGTGATAAAAGTGTATATTACACTA GTAATCCTACCACATTCCACCTAGATGGTGAAGTTATCACCTTTGACAATCTTAAGACACTTCTTTCTTTGAGAGAAGTG AGGACTATTAAGGTGTTTACAACAGTAGACAACATTAACCTCCACACGCAAGTTGTGGACATGTCAATGACATATGGACA ACAGTTTGGTCCAACTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAATTCACATGAAGGTAAAACATTTT ATGTTTTACCTAATGATGACACTCTACGTGTTGAGGCTTTTGAGTACTACCACACAACTGATCCTAGTTTTCTGGGTAGG TACATGTCAGCATTAAATCACACTAAAAAGTGGAAATACCCACAAGTTAATGGTTTAACTTCTATTAAATGGGCAGATAA CAACTGTTATCTTGCCACTGCATTGTTAACACTCCAACAAATAGAGTTGAAGTTTAATCCACCTGCTCTACAAGATGCTT ATTACAGAGCAAGGGCTGGTGAAGCTGCTAACTTTTGTGCACTTATCTTAGCCTACTGTAATAAGACAGTAGGTGAGTTA GGTGATGTTAGAGAAACAATGAGTTACTTGTTTCAACATGCCAATTTAGATTCTTGCAAAAGAGTCTTGAACGTGGTGTG TAAAACTTGTGGACAACAGCAGACAACCCTTAAGGGTGTAGAAGCTGTTATGTACATGGGCACACTTTCTTATGAACAAT TTAAGAAAGGTGTTCAGATACCTTGTACGTGTGGTAAACAAGCTACAAAATATCTAGTACAACAGGAGTCACCTTTTGTT ATGATGTCAGCACCACCTGCTCAGTATGAACTTAAGCATGGTACATTTACTTGTGCTAGTGAGTACACTGGTAATTACCA GTGTGGTCACTATAAACATATAACTTCTAAAGAAACTTTGTATTGCATAGACGGTGCTTTACTTACAAAGTCCTCAGAAT ACAAAGGTCCTATTACGGATGTTTTCTACAAAGAAAACAGTTACACAACAACCATAAAACCAGTTACTTATAAATTGGAT GGTGTTGTTTGTACAGAAATTGACCCTAAGTTGGACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAAT TGATCTTGTACCAAACCAACCATATCCAAACGCAAGCTTCGATAATTTTAAGTTTGTATGTGATAATATCAAATTTGCTG ATGATTTAAACCAGTTAACTGGTTATAAGAAACCTGCTTCAAGAGAGCTTAAAGTTACATTTTTCCCTGACTTAAATGGT GATGTGGTGGCTATTGATTATAAACACTACACACCCTCTTTTAAGAAAGGAGCTAAATTGTTACATAAACCTATTGTTTG GCATGTTAACAATGCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTGTCTTTGGAGCACAAAACCAG TTGAAACATCAAATTCGTTTGATGTACTGAAGTCAGAGGACGCGCAGGGAATGGATAATCTTGCCTGCGAAGATCTAAAA CCAGTCTCTGAAGAAGTAGTGGAAAATCCTACCATACAGAAAGACGTTCTTGAGTGTAATGTGAAAACTACCGAAGTTGT AGGAGACATTATACTTAAACCAGCAAATAATAGTTTAAAAATTACAGAAGAGGTTGGCCACACAGATCTAATGGCTGCTT ATGTAGACAATTCTAGTCTTACTATTAAGAAACCTAATGAATTATCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCAT GGTTTAGCTGCTGTTAATAGTGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCTTTTCTTAACAAAGTTGTTAGTAC AACTACTAACATAGTTACACGGTGTTTAAACCGTGTTTGTACTAATTATATGCCTTATTTCTTTACTTTATTGCTACAAT TGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGCCGACTACTATAGCAAAGAATACTGTTAAGAGT GTCGGTAAATTTTGTCTAGAGGCTTCATTTAATTATTTGAAGTCACCTAATTTTTCTAAACTGATAAATATTATAATTTG GTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAATCTACTCAACCGCTGCTTTAGGTGTTTTAATGTCTAATTTAGGCA TGCCTTCTTACTGTACTGGTTACAGAGAAGGCTATTTGAACTCTACTAATGTCACTATTGCAACCTACTGTACTGGTTCT ATACCTTGTAGTGTTTGTCTTAGTGGTTTAGATTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTC ATCTTTTAAATGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTTTGGCATATATTCTTTTCACTAGGTTTTTCT ATGTACTTGGATTGGCTGCAATCATGCAATTGTTTTTCAGCTATTTTGCAGTACATTTTATTAGTAATTCTTGGCTTATG TGGTTAATAATTAATCTTGTACAAATGGCCCCGATTTCAGCTATGGTTAGAATGTACATCTTCTTTGCATCATTTTATTA TGTATGGAAAAGTTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTTGTATGATGTGTTACAAACGTAATAGAGCAA CAAGAGTCGAATGTACAACTATTGTTAATGGTGTTAGAAGGTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTTTGC AAACTACACAATTGGAATTGTGTTAATTGTGATACATTCTGTGCTGGTAGTACATTTATTAGTGATGAAGTTGCGAGAGA CTTGTCACTACAGTTTAAAAGACCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTTACAGTGAAGAATG GTTCCATCCATCTTTACTTTGATAAAGCTGGTCAAAAGACTTATGAAAGACATTCTCTCTCTCATTTTGTTAACTTAGAC AACCTGAGAGCTAATAACACTAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGATGGTAAATCAAAATGTGAAGAATC ATCTGCAAAATCAGCGTCTGTTTACTACAGTCAGCTTATGTGTCAACCTATACTGTTACTAGATCAGGCATTAGTGTCTG ATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACGTTAATACGTTTTCATCAACTTTTAACGTACCA ATGGAAAAACTCAAAACACTAGTTGCAACTGCAGAAGCTGAACTTGCAAAGAATGTGTCCTTAGACAATGTCTTATCTAC TTTTATTTCAGCAGCTCGGCAAGGGTTTGTTGATTCAGATGTAGAAACTAAAGATGTTGTTGAATGTCTTAAATTGTCAC ATCAATCTGACATAGAAGTTACTGGCGATAGTTGTAATAACTATATGCTCACCTATAACAAAGTTGAAAACATGACACCC CGTGACCTTGGTGCTTGTATTGACTGTAGTGCGCGTCATATTAATGCGCAGGTAGCAAAAAGTCACAACATTGCTTTGAT ATGGAACGTTAAAGATTTCATGTCATTGTCTGAACAACTACGAAAACAAATACGTAGTGCTGCTAAAAAGAATAACTTAC CTTTTAAGTTGACATGTGCAACTACTAGACAAGTTGTTAATGTTGTAACAACAAAGATAGCACTTAAGGGTGGTAAAATT GTTAATAATTGGTTGAAGCAGTTAATTAAAGTTACACTTGTGTTCCTTTTTGTTGCTGCTATTTTCTATTTAATAACACC TGTTCATGTCATGTCTAAACATACTGACTTTTCAAGTGAAATCATAGGATACAAGGCTATTGATGGTGGTGTCACTCGTG ACATAGCATCTACAGATACTTGTTTTGCTAACAAACATGCTGATTTTGACACATGGTTTAGCCAGCGTGGTGGTAGTTAT ACTAATGACAAAGCTTGCCCATTGATTGCTGCAGTCATAACAAGAGAAGTGGGTTTTGTCGTGCCTGGTTTGCCTGGCAC GATATTACGCACAACTAATGGTGACTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGTTGGTAACATCTGTTACACAC CATCAAAACTTATAGAGTACACTGACTTTGCAACATCAGCTTGTGTTTTGGCTGCTGAATGTACAATTTTTAAAGATGCT TCTGGTAAGCCAGTACCATATTGTTATGATACCAATGTACTAGAAGGTTCTGTTGCTTATGAAAGTTTACGCCCTGACAC ACGTTATGTGCTCATGGATGGCTCTATTATTCAATTTCCTAACACCTACCTTGAAGGTTCTGTTAGAGTGGTAACAACTT TTGATTCTGAGTACTGTAGGCACGGCACTTGTGAAAGATCAGAAGCTGGTGTTTGTGTATCTACTAGTGGTAGATGGGTA CTTAACAATGATTATTACAGATCTTTACCAGGAGTTTTCTGTGGTGTAGATGCTGTAAATTTACTTACTAATATGTTTAC ACCACTAATTCAACCTATTGGTGCTTTGGACATATCAGCATCTATAGTAGCTGGTGGTATTGTAGCTATCGTAGTAACAT GCCTTGCCTACTATTTTATGAGGTTTAGAAGAGCTTTTGGTGAATACAGTCATGTAGTTGCCTTTAATACTTTACTATTC CTTATGTCATTCACTGTACTCTGTTTAACACCAGTTTACTCATTCTTACCTGGTGTTTATTCTGTTATTTACTTGTACTT GACATTTTATCTTACTAATGATGTTTCTTTTTTAGCACATATTCAGTGGATGGTTATGTTCACACCTTTAGTACCTTTCT GGATAACAATTGCTTATATCATTTGTATTTCCACAAAGCATTTCTATTGGTTCTTTAGTAATTACCTAAAGAGACGTGTA GTCTTTAATGGTGTTTCCTTTAGTACTTTTGAAGAAGCTGCGCTGTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAA GTTGCGTAGTGATGTGCTATTACCTCTTACGCAATATAATAGATACTTAGCTCTTTATAATAAGTACAAGTATTTTAGTG GAGCAATGGATACAACTAGCTACAGAGAAGCTGCTTGTTGTCATCTCGCAAAGGCTCTCAATGACTTCAGTAACTCAGGT TCTGATGTTCTTTACCAACCACCACAAACCTCTATCACCTCAGCTGTTTTGCAGAGTGGTTTTAGAAAAATGGCATTCCC ATCTGGTAAAGTTGAGGGTTGTATGGTACAAGTAACTTGTGGTACAACTACACTTAACGGTCTTTGGCTTGATGACGTAG TTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGACATGCTTAACCCTAATTATGAAGATTTACTCATTCGTAAGTCT AATCATAATTTCTTGGTACAGGCTGGTAATGTTCAACTCAGGGTTATTGGACATTCTATGCAAAATTGTGTACTTAAGCT TAAGGTTGATACAGCCAATCCTAAGACACCTAAGTATAAGTTTGTTCGCATTCAACCAGGACAGACTTTTTCAGTGTTAG CTTGTTACAATGGTTCACCATCTGGTGTTTACCAATGTGCTATGAGGCCCAATTTCACTATTAAGGGTTCATTCCTTAAT GGTTCATGTGGTAGTGTTGGTTTTAACATAGATTATGACTGTGTCTCTTTTTGTTACATGCACCATATGGAATTACCAAC TGGAGTTCATGCTGGCACAGACTTAGAAGGTAACTTTTATGGACCTTTTGTTGACAGGCAAACAGCACAAGCAGCTGGTA CGGACACAACTATTACAGTTAATGTTTTAGCTTGGTTGTACGCTGCTGTTATAAATGGAGACAGGTGGTTTCTCAATCGA TTTACCACAACTCTTAATGACTTTAACCTTGTGGCTATGAAGTACAATTATGAACCTCTAACACAAGACCATGTTGACAT ACTAGGACCTCTTTCTGCTCAAACTGGAATTGCCGTTTTAGATATGTGTGCTTCATTAAAAGAATTACTGCAAAATGGTA TGAATGGACGTACCATATTGGGTAGTGCTTTATTAGAAGATGAATTTACACCTTTTGATGTTGTTAGACAATGCTCAGGT GTTACTTTCCAAAGTGCAGTGAAAAGAACAATCAAGGGTACACACCACTGGTTGTTACTCACAATTTTGACTTCACTTTT AGTTTTAGTCCAGAGTACTCAATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCTTTTTACCTTTTGCTATGGGTATTA TTGCTATGTCTGCTTTTGCAATGATGTTTGTCAAACATAAGCATGCATTTCTCTGTTTGTTTTTGTTACCTTCTCTTGCC ACTGTAGCTTATTTTAATATGGTCTATATGCCTGCTAGTTGGGTGATGCGTATTATGACATGGTTGGATATGGTTGATAC TAGTTTGTCTGGTTTTAAGCTAAAAGACTGTGTTATGTATGCATCAGCTGTAGTGTTACTAATCCTTATGACAGCAAGAA CTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAAT GCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTCTAACTACTCAGGTGTAGTTACAACTGTCAT GTTTTTGGCCAGAGGTATTGTTTTTATGTGTGTTGAGTATTGCCCTATTTTCTTCATAACTGGTAATACACTTCAGTGTA TAATGCTAGTTTATTGTTTCTTAGGCTATTTTTGTACTTGTTACTTTGGCCTCTTTTGTTTACTCAACCGCTACTTTAGA CTGACTCTTGGTGTTTATGATTACTTAGTTTCTACACAGGAGTTTAGATATATGAATTCACAGGGACTACTCCCACCCAA GAATAGCATAGATGCCTTCAAACTCAACATTAAATTGTTGGGTGTTGGTGGCAAACCTTGTATCAAAGTAGCCACTGTAC AGTCTAAAATGTCAGATGTAAAGTGCACATCAGTAGTCTTACTCTCAGTTTTGCAACAACTCAGAGTAGAATCATCATCT AAATTGTGGGCTCAATGTGTCCAGTTACACAATGACATTCTCTTAGCTAAAGATACTACTGAAGCCTTTGAAAAAATGGT TTCACTACTTTCTGTTTTGCTTTCCATGCAGGGTGCTGTAGACATAAACAAGCTTTGTGAAGAAATGCTGGACAACAGGG CAACCTTACAAGCTATAGCCTCAGAGTTTAGTTCCCTTCCATCATATGCAGCTTTTGCTACTGCTCAAGAAGCTTATGAG CAGGCTGTTGCTAATGGTGATTCTGAAGTTGTTCTTAAAAAGTTGAAGAAGTCTTTGAATGTGGCTAAATCTGAATTTGA CCGTGATGCAGCCATGCAACGTAAGTTGGAAAAGATGGCTGATCAAGCTATGACCCAAATGTATAAACAGGCTAGATCTG AGGACAAGAGGGCAAAAGTTACTAGTGCTATGCAGACAATGCTTTTCACTATGCTTAGAAAGTTGGATAATGATGCACTC AACAACATTATCAACAATGCAAGAGATGGTTGTGTTCCCTTGAACATAATACCTCTTACAACAGCAGCCAAACTAATGGT TGTCATACCAGACTATAACACATATAAAAATACGTGTGATGGTACAACATTTACTTATGCATCAGCATTGTGGGAAATCC AACAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTTAGTGAAATTAGTATGGACAATTCACCTAATTTAGCATGGCCT CTTATTGTAACAGCTTTAAGGGCCAATTCTGCTGTCAAATTACAGAATAATGAGCTTAGTCCTGTTGCACTACGACAGAT GTCTTGTGCTGCCGGTACTACACAAACTGCTTGCACTGATGACAATGCGTTAGCTTACTACAACACAACAAAGGGAGGTA GGTTTGTACTTGCACTGTTATCCGATTTACAGGATTTGAAATGGGCTAGATTCCCTAAGAGTGATGGAACTGGTACTATC TATACAGAACTGGAACCACCTTGTAGGTTTGTTACAGACACACCTAAAGGTCCTAAAGTGAAGTATTTATACTTTATTAA AGGATTAAACAACCTAAATAGAGGTATGGTACTTGGTAGTTTAGCTGCCACAGTACGTCTACAAGCTGGTAATGCAACAG AAGTGCCTGCCAATTCAACTGTATTATCTTTCTGTGCTTTTGCTGTAGATGCTGCTAAAGCTTACAAAGATTATCTAGCT AGTGGGGGACAACCAATCACTAATTGTGTTAAGATGTTGTGTACACACACTGGTACTGGTCAGGCAATAACAGTTACACC GGAAGCCAATATGGATCAAGAATCCTTTGGTGGTGCATCGTGTTGTCTGTACTGCCGTTGCCACATAGATCATCCAAATC CTAAAGGATTTTGTGACTTAAAAGGTAAGTATGTACAAATACCTACAACTTGTGCTAATGACCCTGTGGGTTTTACACTT AAAAACACAGTCTGTACCGTCTGCGGTATGTGGAAAGGTTATGGCTGTAGTTGTGATCAACTCCGCGAACCCATGCTTCA GTCAGCTGATGCACAATCGTTTTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACACCGTGCGGCACAGGCACTAG TACTGATGTCGTATACAGGGCTTTTGACATCTACAATGATAAAGTAGCTGGTTTTGCTAAATTCCTAAAAACTAATTGTT GTCGCTTCCAAGAAAAGGACGAAGATGACAATTTAATTGATTCTTACTTTGTAGTTAAGAGACACACTTTCTCTAACTAC CAACATGAAGAAACAATTTATAATTTACTTAAGGATTGTCCAGCTGTTGCTAAACATGACTTCTTTAAGTTTAGAATAGA CGGTGACATGGTACCACATATATCACGTCAACGTCTTACTAAATACACAATGGCAGACCTCGTCTATGCTTTAAGGCATT TTGATGAAGGTAATTGTGACACATTAAAAGAAATACTTGTCACATACAATTGTTGTGATGATGATTATTTCAATAAAAAG GACTGGTATGATTTTGTAGAAAACCCAGATATATTACGCGTATACGCCAACTTAGGTGAACGTGTACGCCAAGCTTTGTT AAAAACAGTACAATTCTGTGATGCCATGCGAAATGCTGGTATTGTTGGTGTACTGACATTAGATAATCAAGATCTCAATG GTAACTGGTATGATTTCGGTGATTTCATACAAACCACGCCAGGTAGTGGAGTTCCTGTTGTAGATTCTTATTATTCATTG TTAATGCCTATATTAACCTTGACCAGGGCTTTAACTGCAGAGTCACATGTTGACACTGACTTAACAAAGCCTTACATTAA GTGGGATTTGTTAAAATATGACTTCACGGAAGAGAGGTTAAAACTCTTTGACCGTTATTTTAAATATTGGGATCAGACAT ACCACCCAAATTGTGTTAACTGTTTGGATGACAGATGCATTCTGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTG TTCCCACCTACAAGTTTTGGACCACTAGTGAGAAAAATATTTGTTGATGGTGTTCCATTTGTAGTTTCAACTGGATACCA CTTCAGAGAGCTAGGTGTTGTACATAATCAGGATGTAAACTTACATAGCTCTAGACTTAGTTTTAAGGAATTACTTGTGT ATGCTGCTGACCCTGCTATGCACGCTGCTTCTGGTAATCTATTACTAGATAAACGCACTACGTGCTTTTCAGTAGCTGCA CTTACTAACAATGTTGCTTTTCAAACTGTCAAACCCGGTAATTTTAACAAAGACTTCTATGACTTTGCTGTGTCTAAGGG TTTCTTTAAGGAAGGAAGTTCTGTTGAATTAAAACACTTCTTCTTTGCTCAGGATGGTAATGCTGCTATCAGCGATTATG ACTACTATCGTTATAATCTACCAACAATGTGTGATATCAGACAACTACTATTTGTAGTTGAAGTTGTTGATAAGTACTTT GATTGTTACGATGGTGGCTGTATTAATGCTAACCAAGTCATCGTCAACAACCTAGACAAATCAGCTGGTTTTCCATTTAA TAAATGGGGTAAGGCTAGACTTTATTATGATTCAATGAGTTATGAGGATCAAGATGCACTTTTCGCATATACAAAACGTA ATGTCATCCCTACTATAACTCAAATGAATCTTAAGTATGCCATTAGTGCAAAGAATAGAGCTCGCACCGTAGCTGGTGTC TCTATCTGTAGTACTATGACCAATAGACAGTTTCATCAAAAATTATTGAAATCAATAGCCGCCACTAGAGGAGCTACTGT AGTAATTGGAACAAGCAAATTCTATGGTGGTTGGCACAACATGTTAAAAACTGTTTATAGTGATGTAGAAAACCCTCACC TTATGGGTTGGGATTATCCTAAATGTGATAGAGCCATGCCTAACATGCTTAGAATTATGGCCTCACTTGTTCTTGCTCGC AAACATACAACGTGTTGTAGCTTGTCACACCGTTTCTATAGATTAGCTAATGAGTGTGCTCAAGTATTGAGTGAAATGGT CATGTGTGGCGGTTCACTATATGTTAAACCAGGTGGAACCTCATCAGGAGATGCCACAACTGCTTATGCTAATAGTGTTT TTAACATTTGTCAAGCTGTCACGGCCAATGTTAATGCACTTTTATCTACTGATGGTAACAAAATTGCCGATAAGTATGTC CGCAATTTACAACACAGACTTTATGAGTGTCTCTATAGAAATAGAGATGTTGACACAGACTTTGTGAATGAGTTTTACGC ATATTTGCGTAAACATTTCTCAATGATGATACTCTCTGACGATGCTGTTGTGTGTTTCAATAGCACTTATGCATCTCAAG GTCTAGTGGCTAGCATAAAGAACTTTAAGTCAGTTCTTTATTATCAAAACAATGTTTTTATGTCTGAAGCAAAATGTTGG ACTGAGACTGACCTTACTAAAGGACCTCATGAATTTTGCTCTCAACATACAATGCTAGTTAAACAGGGTGATGATTATGT GTACCTTCCTTACCCAGATCCATCAAGAATCCTAGGGGCCGGCTGTTTTGTAGATGATATCGTAAAAACAGATGGTACAC TTATGATTGAACGGTTCGTGTCTTTAGCTATAGATGCTTACCCACTTACTAAACATCCTAATCAGGAGTATGCTGATGTC TTTCATTTGTACTTACAATACATAAGAAAGCTACATGATGAGTTAACAGGACACATGTTAGACATGTATTCTGTTATGCT TACTAATGATAACACTTCAAGGTATTGGGAACCTGAGTTTTATGAGGCTATGTACACACCGCATACAGTCTTACAGGCTG TTGGGGCTTGTGTTCTTTGCAATTCACAGACTTCATTAAGATGTGGTGCTTGCATACGTAGACCATTCTTATGTTGTAAA TGCTGTTACGACCATGTCATATCAACATCACATAAATTAGTCTTGTCTGTTAATCCGTATGTTTGCAATGCTCCAGGTTG TGATGTCACAGATGTGACTCAACTTTACTTAGGAGGTATGAGCTATTATTGTAAATCACATAAACCACCCATTAGTTTTC CATTGTGTGCTAATGGACAAGTTTTTGGTTTATATAAAAATACATGTGTTGGTAGCGATAATGTTACTGACTTTAATGCA ATTGCAACATGTGACTGGACAAATGCTGGTGATTACATTTTAGCTAACACCTGTACTGAAAGACTCAAGCTTTTTGCAGC AGAAACGCTCAAAGCTACTGAGGAGACATTTAAACTGTCTTATGGTATTGCTACTGTACGTGAAGTGCTGTCTGACAGAG AATTACATCTTTCATGGGAAGTTGGTAAACCTAGACCACCACTTAACCGAAATTATGTCTTTACTGGTTATCGTGTAACT AAAAACAGTAAAGTACAAATAGGAGAGTACACCTTTGAAAAAGGTGACTATGGTGATGCTGTTGTTTACCGAGGTACAAC AACTTACAAATTAAATGTTGGTGATTATTTTGTGCTGACATCACATACAGTAATGCCATTAAGTGCACCTACACTAGTGC CACAAGAGCACTATGTTAGAATTACTGGCTTATACCCAACACTCAATATCTCAGATGAGTTTTCTAGCAATGTTGCAAAT TATCAAAAGGTTGGTATGCAAAAGTATTCTACACTCCAGGGACCACCTGGTACTGGTAAGAGTCATTTTGCTATTGGCCT AGCTCTCTACTACCCTTCTGCTCGCATAGTGTATACAGCTTGCTCTCATGCCGCTGTTGATGCACTATGTGAGAAGGCAT TAAAATATTTGCCTATAGATAAATGTAGTAGAATTATACCTGCACGTGCTCGTGTAGAGTGTTTTGATAAATTCAAAGTG AATTCAACATTAGAACAGTATGTCTTTTGTACTGTAAATGCATTGCCTGAGACGACAGCAGATATAGTTGTCTTTGATGA AATTTCAATGGCCACAAATTATGATTTGAGTGTTGTCAATGCCAGATTACGTGCTAAGCACTATGTGTACATTGGCGACC CTGCTCAATTACCTGCACCACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAGTGTGTAGACTT ATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTT GGTTTATGATAATAAGCTTAAAGCACATAAAGACAAATCAGCTCAATGCTTTAAAATGTTTTATAAGGGTGTTATCACGC ATGATGTTTCATCTGCAATTAACAGGCCACAAATAGGCGTGGTAAGAGAATTCCTTACACGTAACCCTGCTTGGAGAAAA GCTGTCTTTATTTCACCTTATAATTCACAGAATGCTGTAGCCTCAAAGATTTTGGGACTACCAACTCAAACTGTTGATTC ATCACAGGGCTCAGAATATGACTATGTCATATTCACTCAAACCACTGAAACAGCTCACTCTTGTAATGTAAACAGATTTA ATGTTGCTATTACCAGAGCAAAAGTAGGCATACTTTGCATAATGTCTGATAGAGACCTTTATGACAAGTTGCAATTTACA AGTCTTGAAATTCCACGTAGGAATGTGGCAACTTTACAAGCTGAAAATGTAACAGGACTCTTTAAAGATTGTAGTAAGGT AATCACTGGGTTACATCCTACACAGGCACCTACACACCTCAGTGTTGACACTAAATTCAAAACTGAAGGTTTATGTGTTG ACATACCTGGCATACCTAAGGACATGACCTATAGAAGACTCATCTCTATGATGGGTTTTAAAATGAATTATCAAGTTAAT GGTTACCCTAACATGTTTATCACCCGCGAAGAAGCTATAAGACATGTACGTGCATGGATTGGCTTCGATGTCGAGGGGTG TCATGCTACTAGAGAAGCTGTTGGTACCAATTTACCTTTACAGCTAGGTTTTTCTACAGGTGTTAACCTAGTTGCTGTAC CTACAGGTTATGTTGATACACCTAATAATACAGATTTTTCCAGAGTTAGTGCTAAACCACCGCCTGGAGATCAATTTAAA CACCTCATACCACTTATGTACAAAGGACTTCCTTGGAATGTAGTGCGTATAAAGATTGTACAAATGTTAAGTGACACACT TAAAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCACATGGCTTTGAGTTGACATCTATGAAGTATTTTGTGAAAA TAGGACCTGAGCGCACCTGTTGTCTATGTGATAGACGTGCCACATGCTTTTCCACTGCTTCAGACACTTATGCCTGTTGG CATCATTCTATTGGATTTGATTACGTCTATAATCCGTTTATGATTGATGTTCAACAATGGGGTTTTACAGGTAACCTACA AAGCAACCATGATCTGTATTGTCAAGTCCATGGTAATGCACATGTAGCTAGTTGTGATGCAATCATGACTAGGTGTCTAG CTGTCCACGAGTGCTTTGTTAAGCGTGTTGACTGGACTATTGAATATCCTATAATTGGTGATGAACTGAAGATTAATGCG GCTTGTAGAAAGGTTCAACACATGGTTGTTAAAGCTGCATTATTAGCAGACAAATTCCCAGTTCTTCACGACATTGGTAA CCCTAAAGCTATTAAGTGTGTACCTCAAGCTGATGTAGAATGGAAGTTCTATGATGCACAGCCTTGTAGTGACAAAGCTT ATAAAATAGAAGAATTATTCTATTCTTATGCCACACATTCTGACAAATTCACAGATGGTGTATGCCTATTTTGGAATTGC AATGTCGATAGATATCCTGCTAATTCCATTGTTTGTAGATTTGACACTAGAGTGCTATCTAACCTTAACTTGCCTGGTTG TGATGGTGGCAGTTTGTATGTAAATAAACATGCATTCCACACACCAGCTTTTGATAAAAGTGCTTTTGTTAATTTAAAAC AATTACCATTTTTCTATTACTCTGACAGTCCATGTGAGTCTCATGGAAAACAAGTAGTGTCAGATATAGATTATGTACCA CTAAAGTCTGCTACGTGTATAACACGTTGCAATTTAGGTGGTGCTGTCTGTAGACATCATGCTAATGAGTACAGATTGTA TCTCGATGCTTATAACATGATGATCTCAGCTGGCTTTAGCTTGTGGGTTTACAAACAATTTGATACTTATAACCTCTGGA ACACTTTTACAAGACTTCAGAGTTTAGAAAATGTGGCTTTTAATGTTGTAAATAAGGGACACTTTGATGGACAACAGGGT GAAGTACCAGTTTCTATCATTAATAACACTGTTTACACAAAAGTTGATGGTGTTGATGTAGAATTGTTTGAAAATAAAAC AACATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAGCGCAACATTAAACCAGTACCAGAGGTGAAAATACTCAATA ATTTGGGTGTGGACATTGCTGCTAATACTGTGATCTGGGACTACAAAAGAGATGCTCCAGCACATATATCTACTATTGGT GTTTGTTCTATGACTGACATAGCCAAGAAACCAACTGAAACGATTTGTGCACCACTCACTGTCTTTTTTGATGGTAGAGT TGATGGTCAAGTAGACTTATTTAGAAATGCCCGTAATGGTGTTCTTATTACAGAAGGTAGTGTTAAAGGTTTACAACCAT CTGTAGGTCCCAAACAAGCTAGTCTTAATGGAGTCACATTAATTGGAGAAGCCGTAAAAACACAGTTCAATTATTATAAG AAAGTTGATGGTGTTGTCCAACAATTACCTGAAACTTACTTTACTCAGAGTAGAAATTTACAAGAATTTAAACCCAGGAG TCAAATGGAAATTGATTTCTTAGAATTAGCTATGGATGAATTCATTGAACGGTATAAATTAGAAGGCTATGCCTTCGAAC ATATCGTTTATGGAGATTTTAGTCATAGTCAGTTAGGTGGTTTACATCTACTGATTGGACTAGCTAAACGTTTTAAGGAA TCACCTTTTGAATTAGAAGATTTTATTCCTATGGACAGTACAGTTAAAAACTATTTCATAACAGATGCGCAAACAGGTTC ATCTAAGTGTGTGTGTTCTGTTATTGATTTATTACTTGATGATTTTGTTGAAATAATAAAATCCCAAGATTTATCTGTAG TTTCTAAGGTTGTCAAAGTGACTATTGACTATACAGAAATTTCATTTATGCTTTGGTGTAAAGATGGCCATGTAGAAACA TTTTACCCAAAATTACAATCTAGTCAAGCGTGGCAACCGGGTGTTGCTATGCCTAATCTTTACAAAATGCAAAGAATGCT ATTAGAAAAGTGTGACCTTCAAAATTATGGTGATAGTGCAACATTACCTAAAGGCATAATGATGAATGTCGCAAAATATA CTCAACTGTGTCAATATTTAAACACATTAACATTAGCTGTACCCTATAATATGAGAGTTATACATTTTGGTGCTGGTTCT GATAAAGGAGTTGCACCAGGTACAGCTGTTTTAAGACAGTGGTTGCCTACGGGTACGCTGCTTGTCGATTCAGATCTTAA TGACTTTGTCTCTGATGCAGATTCAACTTTGATTGGTGATTGTGCAACTGTACATACAGCTAATAAATGGGATCTCATTA TTAGTGATATGTACGACCCTAAGACTAAAAATGTTACAAAAGAAAATGACTCTAAAGAGGGTTTTTTCACTTACATTTGT GGGTTTATACAACAAAAGCTAGCTCTTGGAGGTTCCGTGGCTATAAAGATAACAGAACATTCTTGGAATGCTGATCTTTA TAAGCTCATGGGACACTTCGCATGGTGGACAGCCTTTGTTACTAATGTGAATGCGTCATCATCTGAAGCATTTTTAATTG GATGTAATTATCTTGGCAAACCACGCGAACAAATAGATGGTTATGTCATGCATGCAAATTACATATTTTGGAGGAATACA AATCCAATTCAGTTGTCTTCCTATTCTTTATTTGACATGAGTAAATTTCCCCTTAAATTAAGGGGTACTGCTGTTATGTC TTTAAAAGAAGGTCAAATCAATGATATGATTTTATCTCTTCTTAGTAAAGGTAGACTTATAATTAGAGAAAACAACAGAG TTGTTATTTCTAGTGATGTTCTTGTTAACAACTAAACGAACAATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAG TCAGTGTGTTAATCTTACAACCAGAACTCAATTACCCCCTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTG ACAAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCCAATGTTACTTGGTTCCAT GCTATACATGTCTCTGGGACCAATGGTACTAAGAGGTTTGATAACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGC TTCCACTGAGAAGTCTAACATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTG TTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATGATCCATTTTTGGGTGTTTATTACCAC AAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTATTCTAGTGCGAATAATTGCACTTTTGAATATGTCTCTCA GCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTT ATTTTAAAATATATTCTAAGCACACGCCTATTAATTTAGTGCGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTG GTAGATTTGCCAATAGGTATTAACATCACTAGGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGA TTCTTCTTCAGGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATA ATGAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTTGAAATCCTTC ACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTCCAACCAACAGAATCTATTGTTAGATTTCCTAATATTAC AAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCA ACTGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTAAGTGTTATGGAGTGTCTCCTACTAAA TTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGG GCAAACTGGAAAGATTGCTGATTATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACA ATCTTGATTCTAAGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGA GATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGTAATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACA ATCATATGGTTTCCAACCCACTAATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATG CACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAATTTCAACTTCAATGGTTTA ACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGACATTGCTGACACTAC TGATGCTGTCCGTGATCCACAGACACTTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACAC CAGGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCAGGATGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCA GATCAACTTACTCCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGC TGAACATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAATT CTCCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGTGCAGAAAATTCAGTTGCT TACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATTAGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAA GACATCAGTAGATTGTACAATGTACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTT GTACACAATTAAACCGTGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAA CAAATTTACAAAACACCACCAATTAAAGATTTTGGTGGTTTTAATTTTTCACAAATATTACCAGATCCATCAAAACCAAG CAAGAGGTCATTTATTGAAGATCTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATT GCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTTGCCACCTTTGCTCACA GATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGC ATTACAAATACCATTTGCTATGCAAATGGCTTATAGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACC AAAAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGGA AAACTTCAAGATGTGGTCAACCAAAATGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAATTTTGGTGCAAT TTCAAGTGTTTTAAATGATATCCTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCA GACTTCAAAGTTTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCT ACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCC TCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTG CCATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACACACTGGTTTGTAACACAA AGGAATTTTTATGAACCACAAATCATTACTACAGACAACACATTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGT CAACAACACAGTTTATGATCCTTTGCAACCTGAATTAGACTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATA CATCACCAGATGTTGATTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTC AATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCC ATGGTACATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTATGACCAGTT GCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGACTCTGAGCCAGTGCTCAAA GGAGTCAAATTACATTACACATAAACGAACTTATGGATTTGTTTATGAGAATCTTCACAATTGGAACTGTAACTTTGAAG CAAGGTGAAATCAAGGATGCTACTCCTTCAGATTTTGTTCGCGCTACTGCAACGATACCGATACAAGCCTCACTCCCTTT CGGATGGCTTATTGTTGGCGTTGCACTTCTTGCTGTTTTTCAGAGCGCTTCCAAAATCATAACCCTCAAAAAGAGATGGC AACTAGCACTCTCCAAGGGTGTTCACTTTGTTTGCAACTTGCTGTTGTTGTTTGTAACAGTTTACTCACACCTTTTGCTC GTTGCTGCTGGCCTTGAAGCCCCTTTTCTCTATCTTTATGCTTTAGTCTACTTCTTGCAGAGTATAAACTTTGTAAGAAT AATAATGAGGCTTTGGCTTTGCTGGAAATGCCGTTCCAAAAACCCATTACTTTATGATGCCAACTATTTTCTTTGCTGGC ATACTAATTGTTACGACTATTGTATACCTTACAATAGTGTAACTTCTTCAATTGTCATTACTTCAGGTGATGGCACAACA AGTCCTATTTCTGAACATGACTACCAGATTGGTGGTTATACTGAAAAATGGGAATCTGGAGTAAAAGACTGTGTTGTATT ACACAGTTACTTCACTTCAGACTATTACCAGCTGTACTCAACTCAATTGAGTACAGACACTGGTGTTGAACATGTTACCT TCTTCATCTACAATAAAATTGTTGATGAGCCTGAAGAACATGTCCAAATTCACACAATCGACGGTTCATCCGGAGTTGTT AATCCAGTAATGGAACCAATTTATGATGAACCGACGACGACTACTAGCGTGCCTTTGTAAGCACAAGCTGATGAGTACGA ACTTATGTACTCATTCGTTTCGGAAGAGACAGGTACGTTAATAGTTAATAGCGTACTTCTTTTTCTTGCTTTCGTGGTAT TCTTGCTAGTTACACTAGCCATCCTTACTGCGCTTCGATTGTGTGCGTACTGCTGCAATATTGTTAACGTGAGTCTTGTA AAACCTTCTTTTTACGTTTACTCTCGTGTTAAAAATCTGAATTCTTCTAGAGTTCCTGATCTTCTGGTCTAAACGAACTA AATATTATATTAGTTTTTCTGTTTGGAACTTTAATTTTAGCCATGGCAGATTCCAACGGTACTATTACCGTTGAAGAGCT TAAAAAGCTCCTTGAACAATGGAACCTAGTAATAGGTTTCCTATTCCTTACATGGATTTGTCTTCTACAATTTGCCTATG CCAACAGGAATAGGTTTTTGTATATAATTAAGTTAATTTTCCTCTGGCTGTTATGGCCAGTAACTTTAGCTTGTTTTGTG CTTGCTGCTGTTTACAGAATAAATTGGATCACCGGTGGAATTGCTATCGCAATGGCTTGTCTTGTAGGCTTGATGTGGCT CAGCTACTTCATTGCTTCTTTCAGACTGTTTGCGCGTACGCGTTCCATGTGGTCATTCAATCCAGAAACTAACATTCTTC TCAACGTGCCACTCCATGGCACTATTCTGACCAGACCGCTTCTAGAAAGTGAACTCGTAATCGGAGCTGTGATCCTTCGT GGACATCTTCGTATTGCTGGACACCATCTAGGACGCTGTGACATCAAGGACCTGCCTAAAGAAATCACTGTTGCTACATC ACGAACGCTTTCTTATTACAAATTGGGAGCTTCGCAGCGTGTAGCAGGTGACTCAGGTTTTGCTGCATACAGTCGCTACA GGATTGGCAACTATAAATTAAACACAGACCATTCCAGTAGCAGTGACAATATTGCTTTGCTTGTACAGTAAGTGACAACA GATGTTTCATCTCGTTGACTTTCAGGTTACTATAGCAGAGATATTACTAATTATTATGAGGACTTTTAAAGTTTCCATTT GGAATCTTGATTACATCATAAACCTCATAATTAAAAATTTATCTAAGTCACTAACTGAGAATAAATATTCTCAATTAGAT GAAGAGCAACCAATGGAGATTGATTAAACGAACATGAAAATTATTCTTTTCTTGGCACTGATAACACTCGCTACTTGTGA GCTTTATCACTACCAAGAGTGTGTTAGAGGTACAACAGTACTTTTAAAAGAACCTTGCTCTTCTGGAACATACGAGGGCA ATTCACCATTTCATCCTCTAGCTGATAACAAATTTGCACTGACTTGCTTTAGCACTCAATTTGCTTTTGCTTGTCCTGAC GGCGTAAAACACGTCTATCAGTTACGTGCCAGATCAGTTTCACCTAAACTGTTCATCAGACAAGAGGAAGTTCAAGAACT TTACTCTCCAATTTTTCTTATTGTTGCGGCAATAGTGTTTATAACACTTTGCTTCACACTCAAAAGAAAGACAGAATGAT TGAACTTTCATTAATTGACTTCTATTTGTGCTTTTTAGCCTTTCTGCTATTCCTTGTTTTAATTATGCTTATTATCTTTT GGTTCTCACTTGAACTGCAAGATCATAATGAAACTTGTCACGCCTAAACGAACATGAAATTTCTTGTTTTCTTAGGAATC ATCACAACTGTAGCTGCATTTCACCAAGAATGTAGTTTACAGTCATGTACTCAACATCAACCATATGTAGTTGATGACCC GTGTCCTATTCACTTCTATTCTAAATGGTATATTAGAGTAGGAGCTAGAAAATCAGCACCTTTAATTGAATTGTGCGTGG ATGAGGCTGGTTCTAAATCACCCATTCAGTACATCGATATCGGTAATTATACAGTTTCCTGTTTACCTTTTACAATTAAT TGCCAGGAACCTAAATTGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATGAAGACTTTTTAGAGTATCATGACGTTCGTGT TGTTTTAGATTTCATCTAAACGAACAAACTAAAATGTCTGATAATGGACCCCAAAATCAGCGAAATGCACCCCGCATTAC GTTTGGTGGACCCTCAGATTCAACTGGCAGTAACCAGAATGGAGAACGCAGTGGGGCGCGATCAAAACAACGTCGGCCCC AAGGTTTACCCAATAATACTGCGTCTTGGTTCACCGCTCTCACTCAACATGGCAAGGAAGACCTTAAATTCCCTCGAGGA CAAGGCGTTCCAATTAACACCAATAGCAGTCCAGATGACCAAATTGGCTACTACCGAAGAGCTACCAGACGAATTCGTGG TGGTGACGGTAAAATGAAAGATCTCAGTCCAAGATGGTATTTCTACTACCTAGGAACTGGGCCAGAAGCTGGACTTCCCT ATGGTGCTAACAAAGACGGCATCATATGGGTTGCAACTGAGGGAGCCTTGAATACACCAAAAGATCACATTGGCACCCGC AATCCTGCTAACAATGCTGCAATCGTGCTACAACTTCCTCAAGGAACAACATTGCCAAAAGGCTTCTACGCAGAAGGGAG CAGAGGCGGCAGTCAAGCCTCTTCTCGTTCCTCATCACGTAGTCGCAACAGTTCAAGAAATTCAACTCCAGGCAGCAGTA GGGGAACTTCTCCTGCTAGAATGGCTGGCAATGGCGGTGATGCTGCTCTTGCTTTGCTGCTGCTTGACAGATTGAACCAG CTTGAGAGCAAAATGTCTGGTAAAGGCCAACAACAACAAGGCCAAACTGTCACTAAGAAATCTGCTGCTGAGGCTTCTAA GAAGCCTCGGCAAAAACGTACTGCCACTAAAGCATACAATGTAACACAAGCTTTCGGCAGACGTGGTCCAGAACAAACCC AAGGAAATTTTGGGGACCAGGAACTAATCAGACAAGGAACTGATTACAAACATTGGCCGCAAATTGCACAATTTGCCCCC AGCGCTTCAGCGTTCTTCGGAATGTCGCGCATTGGCATGGAAGTCACACCTTCGGGAACGTGGTTGACCTACACAGGTGC CATCAAATTGGATGACAAAGATCCAAATTTCAAAGATCAAGTCATTTTGCTGAATAAGCATATTGACGCATACAAAACAT TCCCACCAACAGAGCCTAAAAAGGACAAAAAGAAGAAGGCTGATGAAACTCAAGCCTTACCGCAGAGACAGAAGAAACAG CAAACTGTGACTCTTCTTCCTGCTGCAGATTTGGATGATTTCTCCAAACAATTGCAACAATCCATGAGCAGTGCTGACTC AACTCAGGCCTAAACTCATGCAGACCACACAAGGCAGATGGGCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATA GTCTACTCTTGTGCAGAATGAATTCTCGTAACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAGCAATCT TTAATCAGTGTGTAACATTAGGGAGGACTTGAAAGAGCCACCACATTTTCACCGAGGCCACGCGGAGTACGATCGAGTGT ACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTAATTTTAGTAGTGCTATCCCCAT GTGATTTTAATAGCTTCTTAGGAGAATGACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA kraken2-2.1.2/data/FluA_H1N1.fa000066400000000000000000000344521404614725400156670ustar00rootroot00000000000000>kraken:taxid|211044|NC_002023.1 Influenza A virus (A/Puerto Rico/8/1934(H1N1)) segment 1, complete sequence AGCGAAAGCAGGTCAATTATATTCAATATGGAAAGAATAAAAGAACTAAGAAATCTAATGTCGCAGTCTCGCACCCGCGA GATACTCACAAAAACCACCGTGGACCATATGGCCATAATCAAGAAGTACACATCAGGAAGACAGGAGAAGAACCCAGCAC TTAGGATGAAATGGATGATGGCAATGAAATATCCAATTACAGCAGACAAGAGGATAACGGAAATGATTCCTGAGAGAAAT GAGCAAGGACAAACTTTATGGAGTAAAATGAATGATGCCGGATCAGACCGAGTGATGGTATCACCTCTGGCTGTGACATG GTGGAATAGGAATGGACCAATGACAAATACAGTTCATTATCCAAAAATCTACAAAACTTATTTTGAAAGAGTCGAAAGGC TAAAGCATGGAACCTTTGGCCCTGTCCATTTTAGAAACCAAGTCAAAATACGTCGGAGAGTTGACATAAATCCTGGTCAT GCAGATCTCAGTGCCAAGGAGGCACAGGATGTAATCATGGAAGTTGTTTTCCCTAACGAAGTGGGAGCCAGGATACTAAC ATCGGAATCGCAACTAACGATAACCAAAGAGAAGAAAGAAGAACTCCAGGATTGCAAAATTTCTCCTTTGATGGTTGCAT ACATGTTGGAGAGAGAACTGGTCCGCAAAACGAGATTCCTCCCAGTGGCTGGTGGAACAAGCAGTGTGTACATTGAAGTG TTGCATTTGACTCAAGGAACATGCTGGGAACAGATGTATACTCCAGGAGGGGAAGTGAAGAATGATGATGTTGATCAAAG CTTGATTATTGCTGCTAGGAACATAGTGAGAAGAGCTGCAGTATCAGCAGACCCACTAGCATCTTTATTGGAGATGTGCC ACAGCACACAGATTGGTGGAATTAGGATGGTAGACATCCTTAAGCAGAACCCAACAGAAGAGCAAGCCGTGGGTATATGC AAGGCTGCAATGGGACTGAGAATTAGCTCATCCTTCAGTTTTGGTGGATTCACATTTAAGAGAACAAGCGGATCATCAGT CAAGAGAGAGGAAGAGGTGCTTACGGGCAATCTTCAAACATTGAAGATAAGAGTGCATGAGGGATATGAAGAGTTCACAA TGGTTGGGAGAAGAGCAACAGCCATACTCAGAAAAGCAACCAGGAGATTGATTCAGCTGATAGTGAGTGGGAGAGACGAA CAGTCGATTGCCGAAGCAATAATTGTGGCCATGGTATTTTCACAAGAGGATTGTATGATAAAAGCAGTTAGAGGTGATCT GAATTTCGTCAATAGGGCGAATCAGCGACTGAATCCTATGCATCAACTTTTAAGACATTTTCAGAAGGATGCGAAAGTGC TTTTTCAAAATTGGGGAGTTGAACCTATCGACAATGTGATGGGAATGATTGGGATATTGCCCGACATGACTCCAAGCATC GAGATGTCAATGAGAGGAGTGAGAATCAGCAAAATGGGTGTAGATGAGTACTCCAGCACGGAGAGGGTAGTGGTGAGCAT TGACCGGTTCTTGAGAGTCCGGGACCAACGAGGAAATGTACTACTGTCTCCCGAGGAGGTCAGTGAAACACAGGGAACAG AGAAACTGACAATAACTTACTCATCGTCAATGATGTGGGAGATTAATGGTCCTGAATCAGTGTTGGTCAATACCTATCAA TGGATCATCAGAAACTGGGAAACTGTTAAAATTCAGTGGTCCCAGAACCCTACAATGCTATACAATAAAATGGAATTTGA ACCATTTCAGTCTTTAGTACCTAAGGCCATTAGAGGCCAATACAGTGGGTTTGTGAGAACTCTGTTCCAACAAATGAGGG ATGTGCTTGGGACATTTGATACCGCACAGATAATAAAACTTCTTCCCTTCGCAGCCGCTCCACCAAAGCAAAGTAGAATG CAGTTCTCCTCATTTACTGTGAATGTGAGGGGATCAGGAATGAGAATACTTGTAAGGGGCAATTCTCCTGTATTCAACTA CAACAAGGCCACGAAGAGACTCACAGTTCTCGGAAAGGATGCTGGCACTTTAACCGAAGACCCAGATGAAGGCACAGCTG GAGTGGAGTCCGCTGTTCTGAGGGGATTCCTCATTCTGGGCAAAGAAGACAGGAGATATGGGCCAGCATTAAGCATCAAT GAACTGAGCAACCTTGCGAAAGGAGAGAAGGCTAATGTGCTAATTGGGCAAGGAGACGTGGTGTTGGTAATGAAACGAAA ACGGGACTCTAGCATACTTACTGACAGCCAGACAGCGACCAAAAGAATTCGGATGGCCATCAATTAGTGTCGAATAGTTT AAAAACGACCTTGTTTCTACT >kraken:taxid|211044|NC_002021.1 Influenza A virus (A/Puerto Rico/8/1934(H1N1)) segment 2, complete sequence AGCGAAAGCAGGCAAACCATTTGAATGGATGTCAATCCGACCTTACTTTTCTTAAAAGTGCCAGCACAAAATGCTATAAG CACAACTTTCCCTTATACCGGAGACCCTCCTTACAGCCATGGGACAGGAACAGGATACACCATGGATACTGTCAACAGGA CACATCAGTACTCAGAAAAGGCAAGATGGACAACAAACACCGAAACTGGAGCACCGCAACTCAACCCGATTGATGGGCCA CTGCCAGAAGACAATGAACCAAGTGGTTATGCCCAAACAGATTGTGTATTGGAAGCAATGGCTTTCCTTGAGGAATCCCA TCCTGGTATTTTTGAAAACTCGTGTATTGAAACGATGGAGGTTGTTCAGCAAACACGAGTAGACAAGCTGACACAAGGCC GACAGACCTATGACTGGACTTTAAATAGAAACCAGCCTGCTGCAACAGCATTGGCCAACACAATAGAAGTGTTCAGATCA AATGGCCTCACGGCCAATGAGTCTGGAAGGCTCATAGACTTCCTTAAGGATGTAATGGAGTCAATGAAAAAAGAAGAAAT GGGGATCACAACTCATTTTCAGAGAAAGAGACGGGTGAGAGACAATATGACTAAGAAAATGATAACACAGAGAACAATAG GTAAAAGGAAACAGAGATTGAACAAAAGGAGTTATCTAATTAGAGCATTGACCCTGAACACAATGACCAAAGATGCTGAG AGAGGGAAGCTAAAACGGAGAGCAATTGCAACCCCAGGGATGCAAATAAGGGGGTTTGTATACTTTGTTGAGACACTGGC AAGGAGTATATGTGAGAAACTTGAACAATCAGGGTTGCCAGTTGGAGGCAATGAGAAGAAAGCAAAGTTGGCAAATGTTG TAAGGAAGATGATGACCAATTCTCAGGACACCGAACTTTCTTTGACCATCACTGGAGATAACACCAAATGGAACGAAAAT CAGAATCCTCGGATGTTTTTGGCCATGATCACATATATGACCAGAAATCAGCCCGAATGGTTCAGAAATGTTCTAAGTAT TGCTCCAATAATGTTCTCAAACAAAATGGCGAGACTGGGAAAAGGGTATATGTTTGAGAGCAAGAGTATGAAACTTAGAA CTCAAATACCTGCAGAAATGCTAGCAAGCATTGATTTGAAATATTTCAATGATTCAACAAGAAAGAAGATTGAAAAAATC CGACCGCTCTTAATAGAGGGGACTGCATCATTGAGCCCTGGAATGATGATGGGCATGTTCAATATGTTAAGCACTGTATT AGGCGTCTCCATCCTGAATCTTGGACAAAAGAGATACACCAAGACTACTTACTGGTGGGATGGTCTTCAATCCTCTGACG ATTTTGCTCTGATTGTGAATGCACCCAATCATGAAGGGATTCAAGCCGGAGTCGACAGGTTTTATCGAACCTGTAAGCTA CATGGAATCAATATGAGCAAGAAAAAGTCTTACATAAACAGAACAGGTACATTTGAATTCACAAGTTTTTTCTATCGTTA TGGGTTTGTTGCCAATTTCAGCATGGAGCTTCCCAGTTTTGGTGTGTCTGGGAGCAACGAGTCAGCGGACATGAGTATTG GAGTTACTGTCATCAAAAACAATATGATAAACAATGATCTTGGTCCAGCAACAGCTCAAATGGCCCTTCAGTTGTTCATC AAAGATTACAGGTACACGTACCGATGCCATAGAGGTGACACACAAATACAAACCCGAAGATCATTTGAAATAAAGAAACT GTGGGAGCAAACCCGTTCCAAAGCTGGACTGCTGGTCTCCGACGGAGGCCCAAATTTATACAACATTAGAAATCTCCACA TTCCTGAAGTCTGCCTAAAATGGGAATTGATGGATGAGGATTACCAGGGGCGTTTATGCAACCCACTGAACCCATTTGTC AGCCATAAAGAAATTGAATCAATGAACAATGCAGTGATGATGCCAGCACATGGTCCAGCCAAAAACATGGAGTATGATGC TGTTGCAACAACACACTCCTGGATCCCCAAAAGAAATCGATCCATCTTGAATACAAGTCAAAGAGGAGTACTTGAAGATG AACAAATGTACCAAAGGTGCTGCAATTTATTTGAAAAATTCTTCCCCAGCAGTTCATACAGAAGACCAGTCGGGATATCC AGTATGGTGGAGGCTATGGTTTCCAGAGCCCGAATTGATGCACGGATTGATTTCGAATCTGGAAGGATAAAGAAAGAAGA GTTCACTGAGATCATGAAGATCTGTTCCACCATTGAAGAGCTCAGACGGCAAAAATAGTGAATTTAGCTTGTCCTTCATG AAAAAATGCCTTGTTCCTACT >kraken:taxid|211044|NC_002022.1 Influenza A virus (A/Puerto Rico/8/1934(H1N1)) segment 3, complete sequence AGCGAAAGCAGGTACTGATCCAAAATGGAAGATTTTGTGCGACAATGCTTCAATCCGATGATTGTCGAGCTTGCGGAAAA AACAATGAAAGAGTATGGGGAGGACCTGAAAATCGAAACAAACAAATTTGCAGCAATATGCACTCACTTGGAAGTATGCT TCATGTATTCAGATTTCCACTTCATCAATGAGCAAGGCGAGTCAATAATCGTAGAACTTGGTGATCCTAATGCACTTTTG AAGCACAGATTTGAAATAATCGAGGGAAGAGATCGCACAATGGCCTGGACAGTAGTAAACAGTATTTGCAACACTACAGG GGCTGAGAAACCAAAGTTTCTACCAGATTTGTATGATTACAAGGAAAATAGATTCATCGAAATTGGAGTAACAAGGAGAG AAGTTCACATATACTATCTGGAAAAGGCCAATAAAATTAAATCTGAGAAAACACACATCCACATTTTCTCGTTCACTGGG GAAGAAATGGCCACAAAGGCCGACTACACTCTCGATGAAGAAAGCAGGGCTAGGATCAAAACCAGGCTATTCACCATAAG ACAAGAAATGGCCAGCAGAGGCCTCTGGGATTCCTTTCGTCAGTCCGAGAGAGGAGAAGAGACAATTGAAGAAAGGTTTG AAATCACAGGAACAATGCGCAAGCTTGCCGACCAAAGTCTCCCGCCGAACTTCTCCAGCCTTGAAAATTTTAGAGCCTAT GTGGATGGATTCGAACCGAACGGCTACATTGAGGGCAAGCTGTCTCAAATGTCCAAAGAAGTAAATGCTAGAATTGAACC TTTTTTGAAAACAACACCACGACCACTTAGACTTCCGAATGGGCCTCCCTGTTCTCAGCGGTCCAAATTCCTGCTGATGG ATGCCTTAAAATTAAGCATTGAGGACCCAAGTCATGAAGGAGAGGGAATACCGCTATATGATGCAATCAAATGCATGAGA ACATTCTTTGGATGGAAGGAACCCAATGTTGTTAAACCACACGAAAAGGGAATAAATCCAAATTATCTTCTGTCATGGAA GCAAGTACTGGCAGAACTGCAGGACATTGAGAATGAGGAGAAAATTCCAAAGACTAAAAATATGAAAAAAACAAGTCAGC TAAAGTGGGCACTTGGTGAGAACATGGCACCAGAAAAGGTAGACTTTGACGACTGTAAAGATGTAGGTGATTTGAAGCAA TATGATAGTGATGAACCAGAATTGAGGTCGCTTGCAAGTTGGATTCAGAATGAGTTCAACAAGGCATGCGAACTGACAGA TTCAAGCTGGATAGAGCTTGATGAGATTGGAGAAGATGTGGCTCCAATTGAACACATTGCAAGCATGAGAAGGAATTATT TCACATCAGAGGTGTCTCACTGCAGAGCCACAGAATACATAATGAAGGGGGTGTACATCAATACTGCCTTACTTAATGCA TCTTGTGCAGCAATGGATGATTTCCAATTAATTCCAATGATAAGCAAGTGTAGAACTAAGGAGGGAAGGCGAAAGACCAA CTTGTATGGTTTCATCATAAAAGGAAGATCCCACTTAAGGAATGACACCGACGTGGTAAACTTTGTGAGCATGGAGTTTT CTCTCACTGACCCAAGACTTGAACCACACAAATGGGAGAAGTACTGTGTTCTTGAGATAGGAGATATGCTTCTAAGAAGT GCCATAGGCCAGGTTTCAAGGCCCATGTTCTTGTATGTGAGGACAAATGGAACCTCAAAAATTAAAATGAAATGGGGAAT GGAGATGAGGCGTTGTCTCCTCCAGTCACTTCAACAAATTGAGAGTATGATTGAAGCTGAGTCCTCTGTCAAAGAGAAAG ACATGACCAAAGAGTTCTTTGAGAACAAATCAGAAACATGGCCCATTGGAGAGTCTCCCAAAGGAGTGGAGGAAAGTTCC ATTGGGAAGGTCTGCAGGACTTTATTAGCAAAGTCGGTATTTAACAGCTTGTATGCATCTCCACAACTAGAAGGATTTTC AGCTGAATCAAGAAAACTGCTTCTTATCGTTCAGGCTCTTAGGGACAATCTGGAACCTGGGACCTTTGATCTTGGGGGGC TATATGAAGCAATTGAGGAGTGCCTAATTAATGATCCCTGGGTTTTGCTTAATGCTTCTTGGTTCAACTCCTTCCTTACA CATGCATTGAGTTAGTTGTGGCAGTGCTACTATTTGCTATCCATACTGTCCAAAAAAGTACCTTGTTTCTACT >kraken:taxid|211044|NC_002017.1 Influenza A virus (A/Puerto Rico/8/1934(H1N1)) segment 4, complete sequence AGCAAAAGCAGGGGAAAATAAAAACAACCAAAATGAAGGCAAACCTACTGGTCCTGTTATGTGCACTTGCAGCTGCAGAT GCAGACACAATATGTATAGGCTACCATGCGAACAATTCAACCGACACTGTTGACACAGTGCTCGAGAAGAATGTGACAGT GACACACTCTGTTAACCTGCTCGAAGACAGCCACAACGGAAAACTATGTAGATTAAAAGGAATAGCCCCACTACAATTGG GGAAATGTAACATCGCCGGATGGCTCTTGGGAAACCCAGAATGCGACCCACTGCTTCCAGTGAGATCATGGTCCTACATT GTAGAAACACCAAACTCTGAGAATGGAATATGTTATCCAGGAGATTTCATCGACTATGAGGAGCTGAGGGAGCAATTGAG CTCAGTGTCATCATTCGAAAGATTCGAAATATTTCCCAAAGAAAGCTCATGGCCCAACCACAACACAACCAAAGGAGTAA CGGCAGCATGCTCCCATGCGGGGAAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGAGAAGGAGGGCTCATACCCA AAGCTGAAAAATTCTTATGTGAACAAGAAAGGGAAAGAAGTCCTTGTACTGTGGGGTATTCATCACCCGTCTAACAGTAA GGATCAACAGAATATCTATCAGAATGAAAATGCTTATGTCTCTGTAGTGACTTCAAATTATAACAGGAGATTTACCCCGG AAATAGCAGAAAGACCCAAAGTAAGAGATCAAGCTGGGAGGATGAACTATTACTGGACCTTGCTAAAACCCGGAGACACA ATAATATTTGAGGCAAATGGAAATCTAATAGCACCAAGGTATGCTTTCGCACTGAGTAGAGGCTTTGGGTCCGGCATCAT CACCTCAAACGCATCAATGCATGAGTGTAACACGAAGTGTCAAACACCCCTGGGAGCTATAAACAGCAGTCTCCCTTTCC AGAATATACACCCAGTCACAATAGGAGAGTGCCCAAAATACGTCAGGAGTGCCAAATTGAGGATGGTTACAGGACTAAGG AACATTCCGTCCATTCAATCCAGAGGTCTATTTGGAGCCATTGCCGGTTTTATTGAAGGGGGATGGACTGGAATGATAGA TGGATGGTACGGTTATCATCATCAGAATGAACAGGGATCAGGCTATGCAGCGGATCAAAAAAGCACACAAAATGCCATTA ACGGGATTACAAACAAGGTGAACTCTGTTATCGAGAAAATGAACATTCAATTCACAGCTGTGGGTAAAGAATTCAACAAA TTAGAAAAAAGGATGGAAAATTTAAATAAAAAAGTTGATGATGGATTTCTGGACATTTGGACATATAATGCAGAATTGTT AGTTCTACTGGAAAATGAAAGGACTCTGGATTTCCATGACTCAAATGTGAAGAATCTGTATGAGAAAGTAAAAAGCCAAT TAAAGAATAATGCCAAAGAAATCGGAAATGGATGTTTTGAGTTCTACCACAAGTGTGACAATGAATGCATGGAAAGTGTA AGAAATGGGACTTATGATTATCCCAAATATTCAGAAGAGTCAAAGTTGAACAGGGAAAAGGTAGATGGAGTGAAATTGGA ATCAATGGGGATCTATCAGATTCTGGCGATCTACTCAACTGTCGCCAGTTCACTGGTGCTTTTGGTCTCCCTGGGGGCAA TCAGTTTCTGGATGTGTTCTAATGGATCTTTGCAGTGCAGAATATGCATCTGAGATTAGAATTTCAGAAATATGAGGAAA AACACCCTTGTTTCTACT >kraken:taxid|211044|NC_002019.1 Influenza A virus (A/Puerto Rico/8/1934(H1N1)) segment 5, complete sequence AGCAAAAGCAGGGTAGATAATCACTCACTGAGTGACATCAAAATCATGGCGTCCCAAGGCACCAAACGGTCTTACGAACA GATGGAGACTGATGGAGAACGCCAGAATGCCACTGAAATCAGAGCATCCGTCGGAAAAATGATTGGTGGAATTGGACGAT TCTACATCCAAATGTGCACAGAACTTAAACTCAGTGATTATGAGGGACGGTTGATCCAAAACAGCTTAACAATAGAGAGA ATGGTGCTCTCTGCTTTTGACGAAAGGAGAAATAAATACCTGGAAGAACATCCCAGTGCGGGGAAGGATCCTAAGAAAAC TGGAGGACCTATATACAGAAGAGTAAACGGAAAGTGGATGAGAGAACTCATCCTTTATGACAAAGAAGAAATAAGGCGAA TCTGGCGCCAAGCTAATAATGGTGACGATGCAACGGCTGGTCTGACTCACATGATGATCTGGCATTCCAATTTGAATGAT GCAACTTATCAGAGGACAAGGGCTCTTGTTCGCACCGGAATGGATCCCAGGATGTGCTCTCTGATGCAAGGTTCAACTCT CCCTAGGAGGTCTGGAGCCGCAGGTGCTGCAGTCAAAGGAGTTGGAACAATGGTGATGGAATTGGTCAGGATGATCAAAC GTGGGATCAATGATCGGAACTTCTGGAGGGGTGAGAATGGACGAAAAACAAGAATTGCTTATGAAAGAATGTGCAACATT CTCAAAGGGAAATTTCAAACTGCTGCACAAAAAGCAATGATGGATCAAGTGAGAGAGAGCCGGGACCCAGGGAATGCTGA GTTCGAAGATCTCACTTTTCTAGCACGGTCTGCACTCATATTGAGAGGGTCGGTTGCTCACAAGTCCTGCCTGCCTGCCT GTGTGTATGGACCTGCCGTAGCCAGTGGGTACGACTTTGAAAGAGAGGGATACTCTCTAGTCGGAATAGACCCTTTCAGA CTGCTTCAAAACAGCCAAGTGTACAGCCTAATCAGACCAAATGAGAATCCAGCACACAAGAGTCAACTGGTGTGGATGGC ATGCCATTCTGCCGCATTTGAAGATCTAAGAGTATTGAGCTTCATCAAAGGGACGAAGGTGGTCCCAAGAGGGAAGCTTT CCACTAGAGGAGTTCAAATTGCTTCCAATGAAAATATGGAGACTATGGAATCAAGTACACTTGAACTGAGAAGCAGGTAC TGGGCCATAAGGACCAGAAGTGGAGGAAACACCAATCAACAGAGGGCATCTGCGGGCCAAATCAGCATACAACCTACGTT CTCAGTACAGAGAAATCTCCCTTTTGACAGAACAACCGTTATGGCAGCATTCACTGGGAATACAGAGGGGAGAACATCTG ACATGAGGACCGAAATCATAAGGATGATGGAAAGTGCAAGACCAGAAGATGTGTCTTTCCAGGGGCGGGGAGTCTTCGAG CTCTCGGACGAAAAGGCAGCGAGCCCGATCGTGCCTTCCTTTGACATGAGTAATGAAGGATCTTATTTCTTCGGAGACAA TGCAGAGGAGTACGACAATTAAAGAAAAATACCCTTGTTTCTACT >kraken:taxid|211044|NC_002018.1 Influenza A virus (A/Puerto Rico/8/1934(H1N1)) segment 6, complete sequence AGCGAAAGCAGGGGTTTAAAATGAATCCAAATCAGAAAATAATAACCATTGGATCAATCTGTCTGGTAGTCGGACTAATT AGCCTAATATTGCAAATAGGGAATATAATCTCAATATGGATTAGCCATTCAATTCAAACTGGAAGTCAAAACCATACTGG AATATGCAACCAAAACATCATTACCTATAAAAATAGCACCTGGGTAAAGGACACAACTTCAGTGATATTAACCGGCAATT CATCTCTTTGTCCCATCCGTGGGTGGGCTATATACAGCAAAGACAATAGCATAAGAATTGGTTCCAAAGGAGACGTTTTT GTCATAAGAGAGCCCTTTATTTCATGTTCTCACTTGGAATGCAGGACCTTTTTTCTGACCCAAGGTGCCTTACTGAATGA CAGGCATTCAAATGGGACTGTTAAGGACAGAAGCCCTTATAGGGCCTTAATGAGCTGCCCTGTCGGTGAAGCTCCGTCCC CGTACAATTCAAGATTTGAATCGGTTGCTTGGTCAGCAAGTGCATGTCATGATGGCATGGGCTGGCTAACAATCGGAATT TCAGGTCCAGATAATGGAGCAGTGGCTGTATTAAAATACAACGGCATAATAACTGAAACCATAAAAAGTTGGAGGAAGAA AATATTGAGGACACAAGAGTCTGAATGTGCCTGTGTAAATGGTTCATGTTTTACTATAATGACTGATGGCCCGAGTGATG GGCTGGCCTCGTACAAAATTTTCAAGATCGAAAAGGGGAAGGTTACTAAATCAATAGAGTTGAATGCACCTAATTCTCAC TATGAGGAATGTTCCTGTTACCCTGATACCGGCAAAGTGATGTGTGTGTGCAGAGACAATTGGCATGGTTCGAACCGGCC ATGGGTGTCTTTCGATCAAAACCTGGATTATCAAATAGGATACATCTGCAGTGGGGTTTTCGGTGACAACCCGCGTCCCA AAGATGGAACAGGCAGCTGTGGTCCAGTGTATGTTGATGGAGCAAACGGAGTAAAGGGATTTTCATATAGGTATGGTAAT GGTGTTTGGATAGGAAGGACCAAAAGTCACAGTTCCAGACATGGGTTTGAGATGATTTGGGATCCTAATGGATGGACAGA GACTGATAGTAAGTTCTCTGTGAGGCAAGATGTTGTGGCAATGACTGATTGGTCAGGGTATAGCGGGAGTTTCGTTCAAC ATCCTGAGCTAACAGGGCTAGACTGTATAAGGCCGTGCTTCTGGGTTGAATTAATCAGGGGACGACCTAAAGAAAAAACA ATCTGGACTAGTGCGAGCAGCATTTCTTTTTGTGGCGTGAATAGTGATACTGTAGATTGGTCTTGGCCAGACGGTGCTGA GTTGCCATTCACCATTGACAAGTAGTCTGTTCAAAAAACTCCTTGTTTCTACT >kraken:taxid|211044|NC_002016.1 Influenza A virus (A/Puerto Rico/8/1934(H1N1)) segment 7, complete sequence AGCGAAAGCAGGTAGATATTGAAAGATGAGTCTTCTAACCGAGGTCGAAACGTACGTTCTCTCTATCATCCCGTCAGGCC CCCTCAAAGCCGAGATCGCACAGAGACTTGAAGATGTCTTTGCAGGGAAGAACACCGATCTTGAGGTTCTCATGGAATGG CTAAAGACAAGACCAATCCTGTCACCTCTGACTAAGGGGATTTTAGGATTTGTGTTCACGCTCACCGTGCCCAGTGAGCG AGGACTGCAGCGTAGACGCTTTGTCCAAAATGCCCTTAATGGGAACGGGGATCCAAATAACATGGACAAAGCAGTTAAAC TGTATAGGAAGCTCAAGAGGGAGATAACATTCCATGGGGCCAAAGAAATCTCACTCAGTTATTCTGCTGGTGCACTTGCC AGTTGTATGGGCCTCATATACAACAGGATGGGGGCTGTGACCACTGAAGTGGCATTTGGCCTGGTATGTGCAACCTGTGA ACAGATTGCTGACTCCCAGCATCGGTCTCATAGGCAAATGGTGACAACAACCAACCCACTAATCAGACATGAGAACAGAA TGGTTTTAGCCAGCACTACAGCTAAGGCTATGGAGCAAATGGCTGGATCGAGTGAGCAAGCAGCAGAGGCCATGGAGGTT GCTAGTCAGGCTAGGCAAATGGTGCAAGCGATGAGAACCATTGGGACTCATCCTAGCTCCAGTGCTGGTCTGAAAAATGA TCTTCTTGAAAATTTGCAGGCCTATCAGAAACGAATGGGGGTGCAGATGCAACGGTTCAAGTGATCCTCTCGCTATTGCC GCAAATATCATTGGGATCTTGCACTTGATATTGTGGATTCTTGATCGTCTTTTTTTCAAATGCATTTACCGTCGCTTTAA ATACGGACTGAAAGGAGGGCCTTCTACGGAAGGAGTGCCAAAGTCTATGAGGGAAGAATATCGAAAGGAACAGCAGAGTG CTGTGGATGCTGACGATGGTCATTTTGTCAGCATAGAGCTGGAGTAAAAAACTACCTTGTTTCTACT >kraken:taxid|211044|NC_002020.1 Influenza A virus (A/Puerto Rico/8/1934(H1N1)) segment 8, complete sequence AGCAAAAGCAGGGTGACAAAGACATAATGGATCCAAACACTGTGTCAAGCTTTCAGGTAGATTGCTTTCTTTGGCATGTC CGCAAACGAGTTGCAGACCAAGAACTAGGTGATGCCCCATTCCTTGATCGGCTTCGCCGAGATCAGAAATCCCTAAGAGG AAGGGGCAGCACTCTTGGTCTGGACATCGAGACAGCCACACGTGCTGGAAAGCAGATAGTGGAGCGGATTCTGAAAGAAG AATCCGATGAGGCACTTAAAATGACCATGGCCTCTGTACCTGCGTCGCGTTACCTAACCGACATGACTCTTGAGGAAATG TCAAGGGAATGGTCCATGCTCATACCCAAGCAGAAAGTGGCAGGCCCTCTTTGTATCAGAATGGACCAGGCGATCATGGA TAAAAACATCATACTGAAAGCGAACTTCAGTGTGATTTTTGACCGGCTGGAGACTCTAATATTGCTAAGGGCTTTCACCG AAGAGGGAGCAATTGTTGGCGAAATTTCACCATTGCCTTCTCTTCCAGGACATACTGCTGAGGATGTCAAAAATGCAGTT GGAGTCCTCATCGGAGGACTTGAATGGAATGATAACACAGTTCGAGTCTCTGAAACTCTACAGAGATTCGCTTGGAGAAG CAGTAATGAGAATGGGAGACCTCCACTCACTCCAAAACAGAAACGAGAAATGGCGGGAACAATTAGGTCAGAAGTTTGAA GAAATAAGATGGTTGATTGAAGAAGTGAGACACAAACTGAAGGTAACAGAGAATAGTTTTGAGCAAATAACATTTATGCA AGCCTTACATCTATTGCTTGAAGTGGAGCAAGAGATAAGAACTTTCTCATTTCAGCTTATTTAATAATAAAAAACACCCT TGTTTCTACT kraken2-2.1.2/data/FluA_H2N2.fa000066400000000000000000000342101404614725400156610ustar00rootroot00000000000000>kraken:taxid|488241|NC_007378.1 Influenza A virus (A/Korea/426/1968(H2N2)) segment 1, complete sequence AGCAAAAGCAGGTCAATTATATTCAATATGGAAAGAATAAAAGAACTACGGAATCTGATGTCGCAGTCTCGCACTCGCGA GATACTAACAAAAACCACAGTGGACCATATGGCCATAATTAAGAAGTACACATCAGGGAGACAGGAAAAGAACCCGTCAC TTAGGATGAAATGGATGATGGCAATGAAATATCCAATTACAGCTGACAAGAGGATAACAGAAATGGTTCCTGAGAGAAAT GAGCAAGGACAAACTCTATGGAGTAAAATGAGTGATGCCGGGTCAGATCGAGTAATGGTATCACCTTTGGCAGTGACATG GTGGAATAGAAATGGACCAATGACAAGTACGGTTCATTATCCAAAAATCTACAAGACTTATTTTGAGAAAGTCGAAAGGT TAAAACATGGAACCTTTGGCCCTGTCCATTTTAGAAACCAAGTCAAAATACGCCGAAGAGTTGACATAAACCCTGGTCAT GCAGACCTCAGTGCCAAGGAGGCACAAGACGTAATCATGGAAGTTGTTTTCCCCAATGAAGTGGGGGCCAGGATACTAAC GTCGGAATCACAATTAACAATAACCAAAGAGAAAAAAGAAGAACTCCAAGATTGCAAAATTTCTCCTTTGATGGTTGCAT ACATGTTAGAGAGAGAACTTGTCCGAAAAACGAGATTTCTCCCAGTTGCTGGTGGAACAAGCAGTGTGTACATTGAAGTG TTACACTTGACTCAAGGAACATGTTGGGAACAGATGTACACCCCAGGTGGAGAAGTGAGGAATGATGATGTTGATCAAAG TCTAATTATTGCAGCCAGGAACATAGTGAGAAGAGCAGCAGTATCAGCAGATCCACTAGCATCTTTATTGGAGATGTGCC ACAGCACACAGATTGGCGGGACAAGGATGGTGGACATTCTTAGGCAGAACCCAACGGAAGAACAAGCTGTGGATATATGC AAGGCTGCAATGGGACTGAGAATCAGCTCATCCTTCAGTTTTGGCGGGTTCACATTTAAGAGAACAAGCGGGTCATCAAT CAAGAGAGAGGAAGAAGTGCTTACGGGCAATCTCCAAACATTGAAAATAAGGGTGCATGAGGGGTACGAGGAATTCACAA TGGTGGGGAAAAGGGCAACAGCTATACTCAGAAAAGCAACCAGGAGATTGGTTCAGCTGATAGTGAGTGGAAGAGACGAA CAGTCAATAGCCGAAGCAATAATTGTAGCCATGGTGTTTTCACAAGAAGATTGCATGATAAAAGCAGTTAGAGGTGACCT GAATTTCGTTAATAGGGCAAATCAGCGATTGAATCCCATGCATCAACTTTTAAGACATTTTCAGAAAGATGCAAAAGTGC TCTTTCAAAATTGGGGAATTGAACATATCGACAATGTAATGGGAATGATTGGAGTATTACCAGACATGACTCCAAGCACA GAGATGTCAATGAGAGGGATAAGAGTCAGCAAAATGGGCGTGGATGAATACTCCAGCACAGAGAGGGTAGTGGTAAGCAT TGACCGGTTTTTGAGAGTTCGAGACCAACGAGGAAATGTACTACTATCTCCTGAGGAGGTCAGTGAAACACAGGGGACAG AGAAACTGACAATAACTTACTCATCGTCAATGATGTGGGAGATTAATGGCCCTGAGTCAGTGTTGGTCAATACCTATCAG TGGATCATCAGAAACTGGGAAACTGTTAAAATTCAATGGTCTCAGAATCCTACAATGCTATACAATAAAATGGAATTTGA GCCATTTCAGTCTTTAGTTCCTAAGGCCATTAGAGGCCAATACAGTGGATTTGTTAGGACTCTATTCCAACAAATGAGGG ATGTACTTGGGACATTTGATACCACCCAGATAATAAAGCTTCTTCCCTTTGCAGCCGCCCCACCAAAGCAAAGTAGAATG CAGTTCTCTTCATTGACTGTGAATGTGAGGGGATCAGGAATGAGAATACTTGTAAGGGGCAATTCTCCTGTATTCAACTA CAACAAGACCACTAAGAGACTAACAATTCTCGGAAAGGATGCTGGCACTTTAACTGAAGACCCAGATGAAGGCACATCCG GAGTGGAGTCCGCTGTTCTGAGAGGATTCCTCATTCTGGGCAAGGAAGATAGAAGATATGGACCAGCATTAAGCATCAAT GAACTGAGTACCCTTGCAAAAGGAGAAAAGGCTAATGTACTAATTGGGCAAGGAGACGTGGTGTTGGTAATGAAACGAAA ACGGGACTCTAGCATACTTACTGACAGCCAGACAGCGACCAAAAGAATTCGGATGGCCATCAATTAATGTTGAATAGTTT AAAAACGACCTTGTTTCTACT >kraken:taxid|488241|NC_007375.1 Influenza A virus (A/Korea/426/1968(H2N2)) segment 2, complete sequence AGCAAAAGCAGGCAAACCATTTGAATGGATGTCAATCCGACCTTACTTTTCTTGAAAGTTCCAGCGCAAAATGCCATAAG TACTACATTCCCTTATACTGGAGATCCTCCATACAGCCATGGGACAGGAACAGGATACACCATGGACACAGTCAACAGAA CACATCAATATTCAGAAAAGGGGAAGTGGACAACAAACACGGAAACTGGAGCGCCCCAACTTAACCCAATTGATGGACCA CTACCTGAGGACAATGAACCAAGTGGATATGCACAAACAGACTGCGTCCTGGAAGCAATGGCTTTCCTTGAGGAATCACA CCCAGGAATCTTTGAAAATTCGTGTCTTGAAACGATGGAAGTTATTCAACAAACAAGAGTGGACAAACTGACCCAAGGTC GTCAGACCTATGACTGGACATTGAACAGAAATCAGCCGGCTGCAACTGCGCTAGCCAACACTATAGAGGTCTTCAGATCG AATGGACTGACAGCTAATGAGTCGGGAAGGCTAATAGATTTCCTCAAGGATGTGATAGAATCAATGGATAAAGAGGAGAT GGAAATAACAACACACTTCCAAAGAAAAAGAAGAGTAAGAGACAACATGACCAAGAAAATGGTCACACAACGAACAATAG GAAAGAAGAAGCAAAGATTGAACAAGAGAAGCTATCTGATAAGAGCACTGACATTGAACACAATGACTAAAGATGCAGAG AGAGGTAAATTAAAAAGAAGAGCAATTGCAACACCCGGTATGCAGATCAGAGGGTTCGTGCACTTTGTCGAAACACTAGC GAGAAATATTTGTGAGAAACTTGAACAGTCTGGGCTTCCGGTTGGAGGTAATGAAAAGAAGGCTAAACTAGCAAATGTTG TTAGAAAAATGATGACTAATTCACAAGACACAGAGCTCTCTTTCACAATTACTGGAGACAACACCAAATGGAATGAGAAT CAAAATCCTCGAGTGTTTCTGGCGATGATAACATACATCACAAGAAATCAACCTGAATGGTTTAGAAACGTCCTGAGCAT TGCACCCATAATGTTCTCAAATAAAATGGCTAGACTAGGGAAAGGTTACATGTTCGAAAGCAAGAGCATGAAGCTCCGAA CACAAATACCAGCAGAAATGCTAGCAAGTATTGACCTGAAATACTTTAATGAATCAACCAGAAAGAAAATTGAGAAAATA AGGCCTCTCCTAATAGATGGCACAGTCTCATTGAGTCCTGGAATGATGATGGGCATGTTCAACATGCTAAGTACAGTCTT AGGAGTCTCAATCCTGAATCTCGGGCAAAAGAAATACACCAAAACNACATACTGGTGGGACGGACTCCAATCCTCTGATG ACTTCGCTCTCATAGTGAATGCACCAAATCATGAGGGAATACAAGCAGGGGTGAATAGATTCTACAGAACCTGCAAGCTA GTCGGAATCAATATGAGCAAAAAGAAGTCCTACATAAATAGGACAGGGACATTTGAATTCACAAGCTTTTTCTATCGCTA TGGATTTGTAGCCAATTTTAGCATGGAGCTGCCCAGCTTTGGAGTGTCTGGAATTAATGAATCGGCTGATATGAGCATTG GGGTAACAGTGATAAAGAACAATATGATAAATAATGACCTTGGGCCAGCAACAGCCCAAATGGCTCTTCAACTATTCATC AAAGACTACAGATACACGTACCGGTGCCACAGAGGGGACACACAAATTCAGACAAGGAGATCATTCGAGCTAAAGAAGCT GTGGGAGCAAACCCGCTCAAAGGCAGGACTTTTGGTGTCGGATGGAGGATCAAACTTATACAATATCCGGAATCTCCACA TTCCAGAAGTCTGCTTGAAATGGGAGCTAATGGATGAAGACTATCAGGGGAGGCTTTGTAATCCCCTGAATCCATTTGTC AGTCATAAGGAAATTGAGTCTGTAAACAATGCTGTGGTAATGCCAGCTCACGGTCCAGCCAAGAGCATGGAATATGATGC TGTTGCTACTACACACTCCTGGACCCCTAAGAGGAACCGCTCCATTCTCAACACAAGCCAAAGGGGAATTCTTGAAGATG AACAGATGTATCAGAAGTGTTGCAATCTATTTGAGAAATTCTTCCCTAGCAGTTCGTACAGGAGACCAGTTGGAATTTCC AGCATGGTGGAGGCCATGGTGTCTAGGGCTCGGATTGATGCACGGATTGACTTCGAGTCTGGACGGATTAAGAAAGAGGA GTTCGCTGAGATCATGAAGATCTGTTCCACCATTGAAGAGCTCAGACGGCAAAAATAGTGAATTTAGCTTGTCCTTCATG AAAAAATGCCTTGTTTCTACT >kraken:taxid|488241|NC_007376.1 Influenza A virus (A/Korea/426/1968(H2N2)) segment 3, complete sequence AGCAAAAGCAGGTACTGATTCGAAATGGAAGATTTTGTGCGACAATGCTTCAATCCGATGATTGTCGAACTTGCGGAAAA GGCAATGAAAGAGTATGGAGAAGATCTGAAAATCGAAACAAACAAATTTGCAGCAATATGCACTCACTTGGAAGTATGCT TCATGTATTCAGATTTTCATTTCATCAATGAGCAAGGCGAGTCAATAATGGTAGAGCTTGATGATCCAAATGCACTTTTG AAGCACAGATTTGAAATAATAGAGGGAAGAGATCGCACAATGGCCTGGACAGTAGTAAACAGTATTTGCAACACCACAGG AGCTGAGAAACCGAAGTTTCTGCCAGATTTGTATGATTACAAGGAGAATAGATTCATCGAGATTGGAGTGACAAGGAGAG AAGTCCACATATACTATCTTGAAAAGGCCAATAAAATTAAATCTGAGAATACACACATCCACATTTTCTCATTCACTGGG GAAGAAATGGCCACAAAGGCCGACTACACTCTCGATGAGGAAAGCAGGGCTAGGATCAAAACCAGACTATTCACCATAAG ACAAGAAATGGCCAACAGAGGCCTCTGGGATTCCTTTCGTCAGTCCGAAAGAGGCGAAGAAACAATTGAAGAAAGATTTG AAATCACAGGGACAATGCGCAGGCTTGCCGACCAAAGTCTCCCGCCGAACTTCTCCTGCCTTGAGAATTTTAGAGCCTAT GTGGATGGATTCGAACCGAACGGCTACATTGAGGGCAAGCTTTCTCAAATGTCCAAAGAAGTAAATGCAAAAATTGAACC TTTTCTGAAAACAACACCAAGACCAATTAGACTTCCGGATGGGCCTCCTTGTTTTCAGCGGTCCAAATTCCTGCTGATGG ATGCTTTAAAATTAAGCATTGAGGACCCAAGTCACGAAGGGGAGGGAATACCACTATATGATGCGATCAAGTGCATGAGA ACATTCTTTGGATGGAAAGAACCCTATATTGTTAAACCACACGAAAAGGGAATAAATCCAAATTATCTGCTGTCATGGAA GCAAGTACTGGCGGAACTGCAGGACATTGAGAATGAGGAGAAGATTCCAAGAACTAAAAACATGAAGAAAACGAGTCAGC TAAAGTGGGCACTTGGTGAGAACATGGCACCAGAGAAGGTAGACTTTGACAACTGTAGAGACATAAGCGATTTGAAGCAA TATGATAGTGACGAACCTGAATTAAGGTCACTTTCAAGCTGGATCCAGAATGAGTTCAACAAGGCATGCGAGCTGACCGA TTCAATCTGGATAGAGCTCGATGAGATTGGAGAAGACGTGGCTCCAATTGAACACATTGCAAGCATGAGAAGGAATTACT TCACAGCAGAGGTGTCCCATTGCAGAGCCACAGAATATATAATGAAGGGGGTATACATTAATACTGCCTTGCTTAATGCA TCCTGTGCAGCAATGGACGATTTCCAACTAATTCCCATGATAAGCAAGTGTAGAACTAAAGAGGGAAGGCGAAAGACCAA TTTATATGGTTTCATCATAAAAGGAAGATCTCACTTAAGGAATGACACCGACGTGGTAAACTTTGTGAGCATGGAGTTTT CTCTCACTGACCCGAGACTTGAGCCACACAAATGGGAGAAGTACTGTGTCCTTGAGATAGGAGATATGCTACTAAGAAGT GCCATAGGCCAGATGTCAAGGCCTATGTTCTTGTATGTGAGGACAAATGGAACATCAAAGATTAAAATGAAATGGGGAAT GGAGATGAGGCCTTGCCTCCTTCAGTCACTACAACAAATCGAGAGTATGGTTGAAGCCGAGTCCTCTGTCAAAGAGAAAG ACATGACCAAAGAGTTTTTTGAGAATAAATCAGAAACATGGCCCATTGGGGAGTCCCCCAAAGGAGTGGAAGAAGGTTCC ATTGGGAAGGTCTGCAGGACTTTATTAGCCAAGTCGGTATTCAATAGCCTGTATGCATCCCCACAATTAGAAGGATTTTC AGCTGAATCAAGAAAACTGCTTCTTGTCGTTCAGGCTCTTAGGGACAATCTTGAACCTGGAACCTTTGATCTTGGGGGGC TATATGAAGCAATTGAGGAGTGCCTGATTAATGATCCCTGGGTTTTGCTTAATGCGTCTTGGTTCAACTCCTTCCTAACA CATGCATTAAGATAGTTGTGGCAATGCTACTATTTGCTATCCATACTGTCCAAAAAAGTACCTTGTTTCTACT >kraken:taxid|488241|NC_007374.1 Influenza A virus (A/Korea/426/1968(H2N2)) segment 4, complete sequence AGCAAAAGCAGGGGTTATACCATAGACAACCAAAAGCATAACAATGGCCATCATTTATCTCATACTCCTGTTCACAGCAG TGAGGGGGGACCAGATATGCATTGGATACCATGCCAATAATTCCACAGAAAAGGTCGACACAATTCTAGAGCGGAATGTC ACTGTGACTCATGCCAAGGACATCCTTGAGAAGACCCATAACGGAAAGCTATGCAAACTAAACGGAATCCCTCCACTTGA ACTAGGGGACTGTAGCATTGCCGGATGGCTCCTTGGAAATCCAGAATGTGATAGGCTTCTAAGTGTGCCAGAATGGTCCT ATATAATGGAGAAAGAAAACCCGAGATACAGTTTGTGTTACCCAGGCAGCTTCAATGACTATGAAGAATTGAAACATCTC CTCAGCAGCGTGAAACATTTTGAGAAAGTTAAGATTTTGCCCAAAGATAGATGGACACAGCATACAACAACTGGAGGTTC ATGGGCCTGCGCGGTGTCAGGTAAACCATCATTCTTCAGGAACATGGTCTGGCTGACACGTAAAGGATCAAATTATCCGG TTGCCAAAGGATCGTACAACAATACAAGCGGAGAACAAATGCTAATAATTTGGGGAGTGCACCATCCTAATGATGAGGCA GAACAAAGAGCATTGTACCAGAATGTGGGAACCTATGTTTCCGTAGCCACATCAACATTGTACAAAAGGTCAATCCCAGA AATAGCAGCAAGGCCTAAAGTGAATGGACTAGGACGTAGAATGGAATTCTCTTGGACCCTCTTGGATATGTGGGACACCA TAAATTTTGAGAGCACTGGTAATCTAGTTGCACCAGAGTATGGGTTCAAAATATCGAAAAGAGGTAGTTCAGGGATCATG AAGACAGAAGGAACACTTGAGAACTGTGAAACCAAATGCCAAACTCCTTTGGGAGCAATAAATACAACACTACCTTTTCA CAATGTCCACCCACTGACAATAGGTGAATGCCCCAAATATGTAAAATCGGAGAAATTGGTCTTAGCAACAGGACTAAGGA ATGTTCCCCAGATTGAATCAAGAGGATTGTTTGGGGCAATAGCTGGTTTTATAGAAGGAGGATGGCAAGGAATGGTTGAT GGTTGGTATGGATACCATCACAGCAATGACCAGGGATCAGGGTATGCAGCAGACAAAGAATCCACTCAAAAGGCATTTAA TGGAATCACCAACAAGGTAAATTCTGTGATTGAAAAGATGAACACCCAATTTGAAGCTGTTGGGAAAGAATTCAGTAACT TAGAGAAAAGACTGGAGAACTTGAACAAAAAGATGGAAGACGGGTTTCTAGATGTGTGGACATACAATGCAGAGCTTCTA GTTCTGATGGAAAATGAGAGGACACTTGACTTTCATGATTCTAATGTCAAGAATCTGTATGATAAAGTCAGAATGCAGCT GAGAGACAACGTCAAAGAACTAGGAAATGGATGTTTTGAATTTTATCACAAATGTGACAATGAATGCATGGATAGTGTGA AAAACGGGACATATGATTATCCCAAGTATGAAGAAGAATCTAAACTAAATAGAAATGAAATCAAAGGGGTAAAATTGAGC AGCATGGGGGTTTATCAAATCCTTGCCATTTATGCTACAGTAGCAGGTTCTCTGTCACTGGCAATCATGATGGCTGGGAT CTCTTTCTGGATGTGCTCCAACGGGTCTCTGCAGTGCAGAATCTGCATATGATTGTAAGTCATTTTATAATTAAAAACAC CCTTGTTTCCTGA >kraken:taxid|488241|NC_007381.1 Influenza A virus (A/Korea/426/1968(H2N2)) segment 5, complete sequence ATGGCGTCCCAAGGCACCAAACGGTCTTATGAACAGATGGAAACTGATGGGGAACGCCAGAATGCAACTGAGATCAGAGC ATCCGTCGGGAAGATGATTGATGGAATTGGACGATTCTACATCCAAATGTGCACCGAACTTAAACTCAGTGATTATGAGG GGCGACTGATCCAGAACAGCTTAACAATAGAGAGAATGGTGCTCTCTGCTTTTGACGAGAGAAGGAATAAATATCTGGAA GAACATCCCAGCGCGGGGAAGGATCCTAAGAAAACTGGAGGACCCATATACAAGAGAGTAGATGGAAAGTGGATGAGGGA ACTCGTCCTTTATGACAAAGAAGAAATAAGGCGAATCTGGCGCCAAGCCAATAATGGTGATGATGCAACAGCTGGGCTGA CTCACATGATGATCTGGCATTCCAATTTGAATGATACAACATACCAGAGGACAAGAGCTCTTGTTCGCACCGGAATGGAT CCCAGGATGTGCTCTTTGATGCAGGGTTCGACTCTCCCTAGGAGGTCTGGAGCTGCAGGCGCTGCAGTCAAAGGAGTTGG GACAATGGTGATGGAGTTGATCAGGATGATCAAACGTGGGATCAATGATCGGAACTTCTGGAGAGGTGAGAATGGACGGA AAACAAGGAGTGCTTACGAGAGAATGTGCAACATTCTCAAAGGAAAATTTCAAACAGCTGCACAAAGAGCAATGATGGAT CAAGTGAGAGAAAGCCGGAACCCAGGAAATGCTGAGATCGAAGATCTAATCTTTCTGGCACGGTCTGCACTCATATTGAG AGGGTCAGTTGCTCACAAATCTTGTCTGCCCGCCTGTGTGTATGGACCTGCCATAGCCAGTGGGTACAACTTCGAAAAAG AGGGATACTCTCTAGTGGGAATAGACCCTTTCAAACTGCTTCAAAACAGCCAAGTATACAGCCTAATCAGACCGAACGAG AATCCAGCACACAAGAGTCAGCTGGTGTGGATGGCATGCAATTCTGCTGCATTTGAAGATCTAAGAGTATTAAGCTTCAT CAGAGGGACCAAAGTATCCCCAAGGGGGAAACTTTCCACTAGAGGAGTACAAATTGCTTCAAATGAAAACATGGATACTA TGGAATCAAGTACTCTTGAACTAAGAAGCAGGTACTGGGCCATAAGGACCAGAAGTGGAGGAAACACTAATCAACAGAGG GCCTCTGCAGGTCAAATCAGTGTACAACCTGCATTTTCTGTGCAAAGAAACCTCCCATTTGACAAACCAACCATCATGGC AGCATTCACTGGGAATACAGAGGGAAGAACATCAGACATGAGGGCAGAAATCATAAGGATGATGGAAGGTGCAAAACCAG AAGAAATGTCCTTCCAGGGGCGGGGAGTCTTCGAGCTCTCGGACGAAAAGGCAACGAACCCGATCGTGCCCTCTTTTGAC ATGAGTAATGAAGGATCTTATTTCTTCGGAGACAATGCAGAGGAGTACGACAATTAA >kraken:taxid|488241|NC_007382.1 Influenza A virus (A/Korea/426/1968(H2N2)) segment 6, complete sequence ATGAATCCAAATCAAAAGATAATAACAATTGGCTCTGTCTCTCTCACCATTGCAACAGTATGCTTCCTCATGCAGATTGC CATCCTGGTAACTACTGTGACATTGCATTTTAAGCAACATGAGTGCGACTCCCCCGCGAGCAACCAAGTAATGCCGTGTG AACCAATAATAATAGAAAGGAACATAACAGAGATAGTGTATTTGAATAACACCACCATAGAGAAAGAGATCTGCCCCGAA GTAGTGGAATACAGAAATTGGTCAAAGCCGCAATGTCAAATTACAGGATTTGCACCTTTTTCTAAGGACAATTCAATCCG GCTTTCTGCTGGTGGGGACATTTGGGTGACGAGAGAACCTTATGTGTCATGCGATCCTGGCAAGTGTTATCAATTTGCAC TCGGGCAGGGGACCACACTAGACAACAAACATTCAAATGACACAATACATGATAGAATCCCTCATCGAACCCTATTAATG AATGAGTTGGGTGTTCCATTTCATTTAGGAACCAGGCAAGTGTGTGTAGCATGGTCCAGCTCAAGTTGTCACGATGGAAA AGCATGGTTGCATGTTTGTGTCACTGGGGATGATAAAAATGCAACTGCTAGCTTCATTTATGACGGGAGGCTTATGGACA GTATTGGTTCATGGTCTCAAAATATCCTCAGGACCCAGGAGTCGGAATGCGTTTGTATCAATGGGACTTGCACAGTAGTA ATGACTGATGGAAGTGCTTCAGGAAGAGCCGATACTAGAATACTATTCATTGAAGAGGGGAAAATTGTCCATATTAGCCC ATTGTCAGGAAGTGCTCAGCATGTAGAGGAGTGTTCCTGTTATCCTCGATATCCTGACGTCAGATGTATCTGCAGAGACA ACTGGAAAGGCTCTAATAGGCCCGTCATAGACATAAATATGGAAGATTATAGCATTGATTCCAGTTATGTGTGCTCAGGG CTTGTTGGCGACACACCCAGAAACGACGACAGATCTAGCAATAGTAATTGCAGGAATCCTAACAATGAGAGAGGGAATCC AGGAGTGAAAGGCTGGGCCTTTGACAATGGAGATGACGTGTGGATGGGAAGAACGATCAGCAAGGATTTACGCTCAGGTT ATGAAACTTTCAAAGTCATTGGTGGTTGGTCCACACCTAATTCCAAATCGCAGATCAATAGACAGGTCATAGTTGACAGC AATAATTGGTCAGGTTACTCTGGTATTTTCTCTGTTGAGGGCAAAAGATGCATCAATAGGTGCTTTTATGTGGAGTTGAT AAGGGGAAGGCAACAGGAGACTAGAGTATGGTGGACCTCAAACAGTATTGTTGTGTTTTGTGGCACTTCAGGTACTTATG GAACAGGCTCATGGCCTGATGGGGCGAACATCAATTTCATGCCTATATAA >kraken:taxid|488241|NC_007377.1 Influenza A virus (A/Korea/426/1968(H2N2)) segment 7, complete sequence AGCAAAAGCAGGTAGATATTGAAAGATGAGCCTTCTAACCGAGGTCGAAACGTACGTTCTCTCTATCGTCCCGTCAGGCC CCCTCAAAGCCGAGATCGCACAGAGACTTGAAGATGTCTTTGCTGGGAAGAACACAGATCTTGAGGCTCTCATGGAATGG CTAAAGACAAGACCAATCCTGTCACCTCTGACTAAGGGGATTTTGGGATTTGTATTCACGCTCACCGTGCCAAGTGAGCG AGGACTGCAGCGTAGACGCTTTGTCCAAAATGCCCTCAATGGGAATGGGGATCCAAATAACATGGACAGAGCAGTTAAAC TGTATAGAAAGCTTAAGAGGGAGATAACATTCCATGGGGCCAAAGAAGTAGCGCTCAGTTATTCTGCTGGTGCACTTGCC AGTTGCATGGGCCTCATATACAACAGGATGGGGGCTGTGACCACTGAAGTGGCCTTTGCCGTGGTATGTGCAACCTGTGA ACAGATTGCTGACTCCCAGCATAGGTCTCACAGGCAAATGGTGACAACAACCAATCCACTAATAAGACATGAGAACAGAA TGGTTCTGGCCAGCACTACAGCTAAGGCTATGGAGCAAATGGCTGGATCGAGTGAGCAAGCAGCAGAGGCCATGGAGGTT GCTAGTCAGGCCAGGCAAATGGTGCAGGCAATGAGAGCCATTGGGACTCCTCCTAGCTCCAGTGCTGGTCTAAAAGATGA TCTTCTTGAAAATTTGCAGGCCTATCAGAAACGAATGGGGGTGCAGATGCAACGATTCAAGTGACCCCCTTGTTGTTGCT GCGAGTATCATTGGGATCTTGCACTTTATATTGTGGATTCTTGATCGTCTTTTTTTCAAATGCATTTATCGCTTCTTTAA ACACGGTCTGAAAAGAGGGCCTTCTACGGAAGGAGTACCTGAGTCTATGAGGGAAGAATATCGAAAGGAACAGCAGAGTG CTGTGGATGCTGACGATAGTCATTTTGTCAGCATAGAGCTGGAGTAAAAAACTACCTTGTTTCTACT >kraken:taxid|488241|NC_007380.1 Influenza A virus (A/Korea/426/1968(H2N2)) segment 8, complete sequence ATGGATTCTAACACTGTGTCAAGTTTTCAGGTAGATTGCTTCCTTTGGCATGTCCGAAAACAAGTTGTAGACCAAGAACT AGGTGATGCCCCATTCCTTGATCGGCTTCGCCGAGATCAGAAGTCCCTAAGGGGAAGAGGCAGCACTCTCGATCTAGACA TCGAAGCAGCCACCCGTGTTGGAAAGCAGATAGTAGAGAGGATTCTGAAGGAAGAATCCGATGAGGCACTTAAAATGACC ATGGCCTCCGCACCTGCTTCGCGATACCTAACTGACATGACTATTGAGGAATTGTCAAGGGACTGGTTCATGCTAATGCC CAAGCAGAAAGTGGAAGGCCCTCTTTGCATCAGAATAGACCAGGCAATCATGGATAAGAACATCATGTTGAAAGCGAATT TCAGTGTGATTTTTGACCGGCTAGAGACCCTAATATTACTAAGGGCTTTCACCGAAGAGGGAGCAATTGTTGGCGAAATT TCACCATTGCCTTCTCTTCCAGGACATACTATTGAGGATGTCAAAAATGCAATTGGGGTCCTCATCGGAGGACTTGAATG GAATGATAACACAGTTCGAGTCTCTAAAACTCTACAGAGATTCGCTTGGAGAAGCAGTAATGAGAATGGGAGACCTCCAC TCACTCCAAAACAGAAACGGAAAATGGCGAGAACAATTAGGTCAAAAGTTCGAAGAGATAAGATGGCTGATTGAAGAAGT GAGACACAGATTGAAGATAACAGAGAATAGTTTTGAGCAAATAACATTTATGCAAGCCTTACAGCTACTATTTGAAGTGG AACAAGAGATAAGAACTTTCTCGTTTCAGCTTATTTAA kraken2-2.1.2/data/FluA_H3N2.fa000066400000000000000000000345121404614725400156670ustar00rootroot00000000000000>kraken:taxid|335341|NC_007373.1 Influenza A virus (A/New York/392/2004(H3N2)) segment 1, complete sequence AGCAAAAGCAGGTCAATTATATTCAGTATGGAAAGAATAAAAGAACTACGGAACCTGATGTCGCAGTCTCGCACTCGCGA GATACTGACAAAAACCACAGTGGACCATATGGCCATAATTAAGAAGTACACATCGGGGAGACAGGAAAAGAACCCGTCAC TTAGGATGAAATGGATGATGGCAATGAAATACCCAATCACTGCTGACAAAAGGATAACAGAAATGGTTCCGGAGAGAAAT GAACAAGGACAAACTCTATGGAGTAAAATGAGTGATGCTGGATCAGATCGAGTGATGGTATCACCTTTGGCTGTAACATG GTGGAATAGAAATGGACCCGTGGCAAGTACGGTCCATTACCCAAAAGTATACAAGACTTATTTTGACAAAGTCGAAAGGT TAAAACATGGAACCTTTGGCCCTGTTCATTTTAGAAATCAAGTCAAGATACGCAGAAGAGTAGACATAAACCCTGGTCAT GCAGACCTCAGTGCCAAAGAGGCACAAGATGTAATTATGGAAGTTGTTTTTCCCAATGAAGTGGGAGCCAGGATACTAAC ATCAGAATCGCAATTAACAATAACTAAAGAGAAAAAAGAAGAACTCCGAGATTGCAAAATTTCTCCCTTGATGGTTGCAT ACATGTTAGAGAGAGAACTTGTCCGAAAAACAAGATTTCTCCCAGTTGCTGGCGGAACAAGCAGTATATACATTGAAGTC TTACATTTGACTCAAGGAACGTGTTGGGAACAAATGTACACTCCAGGTGGAGAAGTGAGGAATGACGATGTTGACCAAAG CCTAATTATTGCGGCCAGGAACATAGTAAGAAGAGCTGCAGTATCAGCAGATCCACTAGCATCTTTATTGGAGATGTGCC ACAGCACACAAATTGGCGGGACAAGGATGGTGGACATTCTTAGACAGAACCCGACTGAAGAACAAGCTGTGGATATATGC AAGGCTGCAATGGGATTGAGAATCAGCTCATCCTTCAGCTTTGGTGGGTTTACATTTAAAAGAACAAGCGGGTCATCAGT CAAAAAAGAGGAAGAAGTGCTTACAGGCAATCTCCAAACATTGAAGATAAGAGTACATGAGGGGTATGAGGAGTTCACAA TGGTGGGGAAAAGAGCAACAGCTATACTCAGAAAAGCAACCAGAAGATTGGTTCAGCTCATAGTGAGTGGAAGAGACGAA CAGTCAATAGCCGAAGCAATAATCGTGGCCATGGTGTTTTCACAAGAGGATTGCATGATAAAAGCAGTTAGAGGTGACCT GAATTTCGTCAACAGAGCAAATCAACGGTTGAACCCCATGCATCAGCTTTTAAGGCATTTTCAGAAAGATGCGAAAGTGC TTTTTCAAAATTGGGGAATTGAACACATCGACAGTGTGATGGGAATGGTTGGAGTATTACCAGATATGACTCCAAGCACA GAGATGTCAATGAGAGGAATAAGAGTCAGCAAAATGGGTGTGGATGAATACTCCAGTACAGAGAGGGTGGTGGTTAGCAT TGATCGGTTTTTGAGAGTTCGAGACCAACGCGGGAATGTATTATTGTCTCCTGAGGAGGTCAGTGAAACACAGGGAACTG AAAGATTGACAATAACATATTCATCGTCGATGATGTGGGAGATTAACGGTCCTGAGTCGGTTTTGGTCAATACCTATCAA TGGATCATCAGAAATTGGGAAGCTGTCAAAATTCAATGGTCTCAGAATCCTGCAATGTTGTACAACAAAATGGAATTTGA ACCATTTCAATCTTTAGTCCCCAAGGCCATTAGAAGCCAATACAGTGGGTTTGTCAGAACTCTATTCCAACAAATGAGAG ACGTACTTGGGACATTTGACACCACCCAGATAATAAAGCTTCTCCCTTTTGCAGCCGCTCCACCAAAGCAAAGCAGAATG CAGTTCTCTTCACTGACTGTAAATGTGAGGGGATCAGGGATGAGAATACTTGTAAGGGGCAATTCTCCTGTATTCAACTA CAACAAGACCACTAAAAGACTAACAATTCTCGGAAAAGATGCCGGCACTTTAATTGAAGACCCAGATGAAAGCACATCCG GAGTGGAGTCCGCCGTCTTGAGAGGGTTTCTCATTATAGGTAAGGAAGACAGAAGATACGGACCAGCATTAAGCATCAAT GAACTGAGTAACCTTGCAAAAGGGGAAAAGGCTAATGTGCTAATCGGGCAAGGAGACGTGGTGTTGGTAATGAAACGAAA ACGGGACTCTAGCATACTTACTGACAGCCAGACAGCGACCAAAAGAATTCGGATGGCCATCAATTAATGTTGAATAGTTT AAAAACGACCTTGTTTCTACT >kraken:taxid|335341|NC_007372.1 Influenza A virus (A/New York/392/2004(H3N2)) segment 2, complete sequence AGCAAAAGCAGGCAAACCATTTGAATGGATGTCAATCCGACTCTACTGTTCCTAAAGGTTCCAGCGCAAAATGCCATAAG CACCACATTCCCTTATACTGGAGATCCTCCATACAGCCATGGAACAGGAACAGGATACACCATGGACACAGTCAACAGAA CACACCAATATTCAGAGAAGGGGAAGTGGACGACAAATACAGAAACTGGGGCACCCCAACTCAACCCAATTGATGGACCA CTACCTGAGGATAATGAGCCAAGTGGATATGCACAAACAGACTGTGTCCTGGAGGCTATGGCCTTCCTTGAAGAATCCCA CCCAGGTATCTTTGAGAACTCATGCCTTGAAACAATGGAAGTCGTTCAACAAACAAGGGTGGACAAACTAACCCAAGGCC GCCAGACTTATGATTGGACATTAAACAGAAATCAACCGGCAGCAACTGCATTAGCCAACACCATAGAAGTTTTTAGATCG AATGGACTAACAGCCAATGAATCAGGAAGGCTAATAGATTTCCTCAAGGATGTGATGGAATCAATGGATAAAGAGGAAAT GGAGATAACAACACACTTTCAAAGAAAAAGGAGAGTAAGAGACAACATGACCAAGAAAATGGTCACACAAAGAACAATAG GGAAGAAAAAACAAAGAGTGAATAAGAGAGGCTATCTAATAAGAGCTTTGACATTGAACACGATGACCAAAGATGCAGAG AGAGGTAAATTAAAAAGAAGGGCTATTGCAACACCCGGGATGCAAATTAGAGGGTTCGTGTACTTCGTTGAAACTTTAGC TAGAAGCATTTGCGAAAAGCTTGAACAGTCTGGACTTCCGGTTGGGGGTAATGAAAAGAAGGCCAAACTGGCAAATGTTG TGAGAAAAATGATGACTAATTCACAAGACACTGAGCTTTCTTTCACAATCACTGGGGACAACACTAAGTGGAATGAAAAT CAAAACCCTCGAATGTTTTTGGCGATGATTACATATATCACAAAAAATCAACCTGAGTGGTTCAGAAACATCCTGAGCAT CGCACCAATAATGTTCTCAAACAAAATGGCAAGACTAGGAAAAGGATACATGTTCGAGAGTAAGAGAATGAAGCTCCGAA CACAAATACCCGCAGAAATGCTAGCAAGCATTGACCTGAAGTATTTCAATGAATCAACAAGGAAGAAAATTGAGAAAATA AGGCCTCTTCTAATAGATGGCACAGCATCATTGAGCCCTGGGATGATGATGGGCATGTTCAACATGCTAAGTACGGTTTT AGGAGTCTCGGTACTGAATCTTGGGCAAAAGAAATACACCAAGACAACATACTGGTGGGATGGGCTCCAATCCTCCGACG ATTTTGCCCTCATAGTGAATGCACCAAATCATGAGGGAATACAAGCAGGAGTGGATAGATTCTACAGGACCTGCAAGTTA GTGGGAATCAACATGAGCAAAAAGAAGTCCTATATAAATAAAACAGGGACATTTGAATTCACAAGCTTTTTTTATCGATA TGGATTTGTGGCTAATTTTAGCATGGAGCTTCCCAGTTTTGGAGTGTCTGGAATAAACGAGTCAGCTGATATGAGTATTG GAGTAACAGTGATAAAGAACAACATGATAAACAATGACCTTGGGCCAGCAACAGCCCAGATGGCTCTCCAATTGTTCATC AAAGACTACAGATATACATATAGGTGCCATAGAGGAGACACACAAATTCAGACGAGAAGATCATTCGAGCTAAAGAAGCT GTGGGATCAAACCCAATCAAGGGCAGGACTATTGGTATCAGATGGGGGACCAAACTTATACAATATCCGGAACCTTCACA TCCCTGAAGTCTGCTTAAAGTGGGAGCTAATGGATGAGAATTATCGGGGAAGACTTTGTAACCCCCTGAATCCCTTTGTC AGCCATAAAGAAATTGAGTCTGTAAACAATGCTGTAGTGATGCCAGCCCACGGTCCAGCCAAAAGTATGGAATATGATGC CGTTGCAACTACACACTCCTGGAATCCCAAGAGGAACCGCTCTATTCTAAACACTAGCCAAAGGGGAATTCTTGAGGATG AACAGATGTACCAAAAGTGCTGCAACTTGTTCGAGAAATTTTTCCCTAGTAGTTCATATAGGAGACCGATTGGAATTTCT AGCATGGTGGAGGCCATGGTGTCTAGGGCCCGGATTGATGCCAGAATTGACTTCGAGTCTGGACGGATTAAGAAGGAAGA GTTCTCTGAGATCATGAAGATCTGTTCCACCATTGAAGAACTCAGACGGCAAAAATAATGAATTTAGCTTGTCCTTCATG AAAAAATGCCTTGTTTCTACT >kraken:taxid|335341|NC_007371.1 Influenza A virus (A/New York/392/2004(H3N2)) segment 3, complete sequence AGCAAAAGCAGGTACTGATTCGAAATGGAAGATTTTGTGCGACAATGCTTCAACCCGATGATTGTCGAACTTGCAGAAAA AGCAATGAAAGAGTATGGAGAGGATCTGAAAATTGAAACAAACAAATTTGCAGCAATATGCACCCACTTGGAGGTATGTT TCATGTATTCAGATTTTCATTTCATCAATGAACAAGGCGAATCAATAGTGGTAGAACTTGATGATCCAAATGCACTGTTA AAGCACAGATTTGAAATAATCGAGGGGAGAGACAGAACAATGGCCTGGACAGTAGTAAACAGTATCTGCAACACTACTGG AGCAGAAAAACCAAAGTTTCTACCAGATTTGTATGATTACAAGGAGAATAGATTCATCGAAATTGGAGTGACAAGAAGAG AAGTCCACATATATTACCTTGAAAAGGCCAATAAAATTAAATCTGAGAACACACACATTCACATCTTCTCATTCACTGGG GAGGAAATAGCCACAAAGGCAGACTACACTCTCGACGAGGAAAGCAGGGCTAGGATTAAAACCAGGCTATTTACCATAAG ACAAGAAATGGCCAACAGAGGCCTCTGGGATTCCTTTCGTCAGTCCGAAAGAGGCGAAGAAACAATTGAAGAAAAATTTG AAATCTCAGGAACTATGCGTAGGCTTGCCGACCAAAGTCTCCCACCGAAATTCTCCTGCCTTGAGAATTTTAGAGCCTAT GTGGATGGATTCGAACCGAACGGCTGCATTGAGGGCAAGCTTTCTCAAATGTCCAAAGAAGTGAATGCCAAAATTGAACC TTTTCTGAAGACAACACCAAGACCAATCAAACTTCCTAATGGACCTCCTTGTTATCAGCGGTCCAAATTCCTCCTGATGG ATGCTTTGAAATTGAGCATTGAAGACCCAAGTCATGAAGGAGAAGGGATTCCATTATATGATGCGATCAAGTGCATAAAA ACATTCTTTGGATGGAAAGAACCTTATATAGTCAAACCACACGAAAAGGGAATAAATTCAAATTACCTGCTGTCATGGAA GCAAGTATTGTCAGAATTGCAGGACATTGAAAATGAGGAGAAGATCCCAAGGACTAAAAACATGAAGAAAACGAGTCAAC TAAAGTGGGCTCTTGGTGAAAACATGGCACCAGAGAAAGTAGACTTTGACAACTGCAGAGACATAAGCGATTTGAAGCAA TATGATAGTGACGAACCTGAATTAAGGTCACTTTCAAGCTGGATACAGAATGAGTTCAACAAGGCCTGCGAGCTAACTGA TTCAATCTGGATAGAGCTCGATGAAATTGGAGAGGACGTAGCCCCAATTGAGTACATTGCAAGCATGAGGAGGAATTATT TCACAGCAGAGGTGTCCCATTGTAGAGCCACTGAGTACATAATGAAGGGGGTATACATTAATACTGCCCTGCTCAATGCA TCCTGTGCAGCAATGGACGATTTTCAACTAATTCCCATGATAAGCAAGTGCAGAACTAAAGAGGGAAGGCGAAAAACCAA TTTATATGGATTCATCATAAAGGGAAGATCTCATTTAAGGAATGACACAGATGTGGTAAACTTTGTGAGCATGGAGTTTT CTCTCACTGACCCGAGACTTGAGCCACATAAATGGGAGAAATACTGTGTCCTTGAGATAGGAGATATGTTACTAAGAAGT GCCATAGGCCAAATTTCAAGGCCTATGTTCTTGTATGTGAGGACAAACGGAACATCAAAGGTCAAAATGAAATGGGGAAT GGAGATGAGACGTTGCCTCCTTCAGTCACTCCAGCAGATCGAGAGCATGATTGAAGCCGAGTCCTCGATTAAAGAGAAAG ACATGACCAAAGAGTTTTTTGAGAATAAATCAGAAGCATGGCCCATTGGGGAGTCCCCCAAGGGAGTGGAAGAAGGTTCC ATTGGGAAAGTCTGTAGGACTCTATTGGCTAAGTCAGTGTTCAATAGCCTGTATGCATCACCACAATTGGAAGGATTTTC AGCGGAGTCAAGAAAACTGCTTCTTGTTGTTCAGGCTCTTAGGGACAACCTCGAACCTGGGACCTTTGATCTCGGGGGGC TATATGAAGCAATTGAGGAGTGCCTGATTAATGATCCCTGGGTTTTGCTCAATGCATCTTGGTTCAACTCCTTCCTGACA CATGCATTAAAATAGTTATGGCAGTGCTACTATTTGTTATCCGTACTGTCCAAAAAAGTACCTTGTTTCTACT >kraken:taxid|335341|NC_007366.1 Influenza A virus (A/New York/392/2004(H3N2)) segment 4, complete sequence AGCAAAAGCAGGGGATAATTCTATTAACCATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAA AAACTTCCCGGAAATGACAACAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAAC AATCACGAATGACCAAATTGAAGTCACTAATGCTACTGAACTGGTTCAGAGTTCCTCAACAGGTGGAATATGCGACAGTC CTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTCCAAAAT AAGAAATGGGACCTTTTTGTTGAACGCAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCT TAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGACTGGAGTCACTCAAAATGGAA CAAGCTCTGCTTGCAAAAGGAGATCTAATAACAGTTTCTTTAGTAGATTGAATTGGTTGACCCACTTAAAATTCAAATAC CCAGCATTGAACGTGACTATGCCAAACAATGAAAAATTTGACAAACTGTACATTTGGGGGGTTCACCACCCGGGTACGGA CAATGACCAAATCAGCCTATATGCTCAAGCATCAGGAAGAATCACAGTCTCTACCAAAAGAAGCCAACAAACCGTAATCC CGAGTATCGGATCTAGACCCAGGATAAGGGATGTCCCCAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGAC ATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTCGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAAT GAGATCAGATGCACCCATTGGCAAATGCAATTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTTC AAAATGTAAACAGGATCACATATGGGGCCTGTCCCAGATATGTTAAGCAAAACACTCTGAAATTGGCAACAGGGATGCGA AATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATCGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTAGA CGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAACAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCA ACCAAATCAATGGGAAGCTGAATAGGTTGATCGGGAAAACAAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAA GTAGAAGGGAGAATTCAGGACCTCGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCT TGTGGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAGAACAAAGAAGCAAC TGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGGTCAATC AGAAATGGAACTTATGACCATGATGTATACAGAGATGAAGCATTAAACAACCGGTTCCAGATCAAAGGTGTTGAGTTGAA GTCAGGATACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCA TCATGTGGGCCTGCCAAAAAGGCAACATTAGGTGCAACATTTGCATTTGAGTGCATTAATTAAAAACACCCTTGTTTCTA CT >kraken:taxid|335341|NC_007369.1 Influenza A virus (A/New York/392/2004(H3N2)) segment 5, complete sequence AGCAAAAGCAGGGTTAATAATCACTCACCGAGTGACATCAAAATCATGGCGTCCCAAGGCACCAAACGGTCTTATGAACA GATGGAAACTGATGGGGATCGCCAGAATGCAACTGAGATTAGGGCATCCGTCGGGAAGATGATTGATGGAATTGGGAGAT TCTACATCCAAATGTGCACTGAACTTAAACTCAGTGATCATGAAGGGCGGTTGATCCAGAACAGCTTGACAATAGAGAAA ATGGTGCTCTCTGCTTTTGATGAAAGAAGGAATAAATACCTGGAAGAACACCCCAGCGCGGGGAAAGATCCCAAGAAAAC TGGGGGGCCCATATACAGGAGAGTAGATGGAAAATGGATGAGGGAACTCGTCCTTTATGACAAAGAAGAGATAAGGCGAA TCTGGCGCCAAGCCAACAATGGTGAGGATGCGACAGCTGGTCTAACTCACATAATGATCTGGCATTCCAATTTGAATGAT GCAACATACCAGAGGACAAGAGCTCTTGTTCGAACTGGAATGGATCCCAGAATGTGCTCTCTGATGCAGGGCTCGACTCT CCCTAGAAGGTCCGGAGCTGCAGGTGCTGCAGTCAAAGGAATCGGGACAATGGTGATGGAACTGATCAGAATGGTCAAAC GGGGGATCAACGATCGAAATTTCTGGAGAGGTGAGAATGGGCGGAAAACAAGAAGTGCTTATGAGAGAATGTGCAACATT CTTAAAGGAAAATTTCAAACAGCTGCACAAAGAGCAATGGTGGATCAAGTGAGAGAAAGTCGGAACCCAGGAAATGCTGA GATCGAAGATCTCATATTTTTGGCAAGATCTGCATTGATATTGAGAGGGTCAGTTGCTCACAAATCTTGCCTACCTGCCT GTGCGTATGGACCTGCAGTATCCAGTGGGTACGACTTCGAAAAAGAGGGATATTCCTTGGTGGGAATAGACCCTTTCAAA CTACTTCAAAATAGCCAAATATACAGCCTAATCAGACCTAACGAGAATCCAGCACACAAGAGTCAGCTGGTGTGGATGGC ATGCCATTCTGCTGCATTTGAAGATTTAAGATTGTTAAGCTTCATCAGAGGGACAAAAGTATCTCCGCGGGGGAAACTGT CAACTAGAGGAGTACAAATTGCTTCAAATGAGAACATGGATAATATGGGATCGAGCACTCTTGAACTGAGAAGCGGGTAC TGGGCCATAAGGACCAGGAGTGGAGGAAACACTAATCAACAGAGGGCCTCCGCAGGCCAAACCAGTGTGCAACCTACGTT TTCTGTACAAAGAAACCTCCCATTTGAAAAGTCAACCATCATGGCAGCATTCACTGGAAATACGGAGGGAAGGACTTCAG ACATGAGGGCAGAAATCATAAGAATGATGGAAGGTGCAAAACCAGAAGAAGTGTCATTCCGGGGGAGGGGAGTTTTCGAG CTCTCAGACGAGAAGGCAACGAACCCGATCGTGCCCTCTTTTGATATGAGTAATGAAGGATCTTATTTCTTCGGAGACAA TGCAGAAGAGTACGACAATTAAGGAAAAAATACCCTTGTTTCTACT >kraken:taxid|335341|NC_007368.1 Influenza A virus (A/New York/392/2004(H3N2)) segment 6, complete sequence AGCAAAAGCAGGAGTAAAGATGAATCCAAATCAAAAGATAATAACGATTGGCTCTGTTTCTCTCACCATTTCCACAATAT GCTTCTTCATGCAAATTGCCATCCTGATAACCACTGTAACATTGCATTTCAAGCAATATGAATTCAACTCCCCCCCAAAC AACCAAGTGATGCTGTGTGAACCAACAATAATAGAAAGAAACATAACAGAGATAGTGTATCTGACCAACACCACCATAGA GAAGGAAATGTGCCCCAAACTAGCAGAATACAGAAATTGGTCAAAGCCGCAATGTGACATTACAGGATTTGCACCTTTTT CTAAGGACAATTCGATTAGGCTTTCCGCTGGTGGGGACATCTGGGTGACAAGAGAACCTTATGTGTCATGCGACCCTGAC AAGTGTTACCAATTTGCCCTTGGACAGGGAACAACACTAAACAACGTGCATTCAAATGACACAGTACATGATAGGACCCC TTATCGGACCCTATTGATGAATGAATTAGGTGTTCCATTTCATCTGGGGACCAAGCAAGTGTGCATAGCATGGTCCAGCT CAAGTTGTCACGATGGAAAAGCATGGCTGCATGTTTGTGTAACGGGGGATGATAAAAATGCAACTGCTAGCTTCATTTAC AATGGGAGGCTTGTAGATAGTATTGTTTCATGGTCCAAAAAAATCCTCAGGACCCAGGAGTCAGAATGCGTTTGTATCAA TGGAACTTGTACAGTAGTAATGACTGATGGGAGTGCTTCAGGAAAAGCTGATACTAAAATACTATTCATTGAGGAGGGGA AAATCATTCATACTAGCACATTGTCAGGAAGTGCTCAGCATGTCGAGGAGTGCTCCTGCTATCCTCGATATCCTGGTGTC AGATGTGTCTGCAGAGACAACTGGAAAGGCTCCAATAGGCCCATCGTAGATATAAACATAAAGGATTATAGCATTGTTTC CAGTTATGTGTGCTCAGGGCTTGTTGGAGACACACCCAGAAAAAACGACAGCTCCAGCAGTAGCCATTGCTTGGATCCTA ACAATGAAGAAGGTGGTCATGGAGTGAAAGGCTGGGCCTTTGATGATGGAAATGACGTGTGGATGGGAAGAACGATCAGC GAGAAGTTACGCTCAGGATATGAAACCTTCAAAGTCATTGAAGGCTGGTCCAAACCTAATTCCAAATTGCAGATAAATAG GCAAGTCATAGTTGACAGAGGTAATAGGTCCGGTTATTCTGGTATTTTCTCTGTTGAAGGCAAAAGCTGCATCAATCGGT GCTTTTATGTGGAGTTGATAAGGGGAAGAAAAGAGGAAACTGAAGTCTTGTGGACCTCAAACAGTATTGTTGTGTTTTGT GGCACCTCAGGTACATATGGAACAGGCTCATGGCCTGATGGGGCGGACATCAATCTCATGCCTATATAAGCTTTCGCAAT TTTAGAAAAAAACTCCTTGTTTCTACT >kraken:taxid|335341|NC_007367.1 Influenza A virus (A/New York/392/2004(H3N2)) segment 7, complete sequence AGCAAAAGCAGGTAGATATTGAAAGATGAGCCTTCTAACCGAGGTCGAAACGTATGTTCTCTCTATCGTTCCATCAGGCC CCCTCAAAGCCGAGATCGCGCAGAGACTTGAAGATGTCTTTGCTGGGAAAAACACAGATCTTGAGGCTCTCATGGAATGG CTAAAGACAAGACCAATTCTGTCACCTCTGACTAAGGGGATTTTGGGGTTTGTGTTCACGCTCACCGTGCCCAGTGAGCG AGGACTGCAGCGTAGACGCTTTGTCCAAAATGCCCTCAATGGGAATGGAGATCCAAATAACATGGACAAAGCAGTTAAAC TGTATAGGAAACTTAAGAGGGAGATAACGTTCCATGGGGCCAAAGAAATAGCTCTCAGTTATTCTGCTGGTGCACTTGCC AGTTGCATGGGCCTCATATACAATAGGATGGGGGCTGTAACCACTGAAGTGGCATTTGGCCTGGTATGTGCAACATGTGA ACAGATTGCTGACTCCCAGCACAGGTCTCATAGGCAAATGGTGGCAACAACCAATCCATTAATAAAACATGAGAACAGAA TGGTTTTGGCCAGCACTACAGCTAAGGCTATGGAGCAAATGGCTGGATCAAGTGAGCAGGCAGCGGAGGCCATGGAAATT GCTAGTCAGGCCAGGCAAATGGTGCAGGCAATGAGAGCCGTTGGGACTCATCCTAGCTCCAGTACTGGTCTAAGAGATGA TCTTCTTGAAAATTTGCAGACCTATCAGAAACGAATGGGGGTGCAGATGCAACGATTCAAGTGACCCGCTTGTTGTTGCC GCGAGTATCATTGGGATCTTGCACTTGATATTGTGGATTCTTGATCGTCTTTTTTTCAAATGCGTCTATCGACTCTTCAA ACACGGCCTTAAAAGAGGCCCTTCTACGGAAGGAGTACCTGAGTCTATGAGGGAAGAATATCGAAAGGAACAGCAGAATG CTGTGGATGCTGACGACAGTCATTTTGTCAGCATAGAGTTGGAGTAAAAAACTACCTTGTTTCTACT >kraken:taxid|335341|NC_007370.1 Influenza A virus (A/New York/392/2004(H3N2)) segment 8, complete sequence AGCAAAAGCAGGGTGACAAAGACATAATGGATTCCAACACTGTGTCAAGTTTCCAGGTAGATTGCTTTCTTTGGCATATC CGGAAACAAGTTGTAGACCAAGAACTGAGTGATGCCCCATTCCTTGATCGGCTTCGCCGAGATCAGAGGTCCCTAAGGGG AAGAGGCAATACTCTCGGTCTAGACATCAAAGCAGCCACCCATGTTGGAAAGCAAATTGTAGAAAAGATTCTGAAAGAAG AATCTGATGAGGCACTTAAAATGACCATGGTCTCCACACCTGCTTCGCGATACATAACTGACATGACTATTGAGGAATTG TCAAGAAACTGGTTCATGCTAATGCCCAAGCAGAAAGTGGAAGGACCTCTTTGCATCAGAATGGACCAGGCAATCATGGA GAAAAACATCATGTTGAAAGCGAATTTCAGTGTGATTTTTGACCGACTAGAGACCATAGTATTACTAAGGGCTTTCACCG AAGAGGGAGCAATTGTTGGCGAAATCTCACCATTGCCTTCTTTTCCAGGACATACTATTGAGGATGTCAAAAATGCAATT GGGGTCCTCATCGGAGGACTTGAATGGAATGATAACACAGTTCGAGTCTCTAAAAATCTACAGAGATTCGCTTGGAGAAG CAGTAATGAGAATGGGGGACCTCCACTTACTCCAAAACAGAAACGGAAAATGGCGAGAACAGCTAGGTCAAAAGTTTGAA GAGATAAGATGGCTGATTGAAGAAGTGAGACACAGACTAAAAACAACTGAAAATAGCTTTGAACAAATAACATTCATGCA AGCATTACAACTGCTGTTTGAAGTGGAACAGGAGATAAGAACTTTCTCATTTCAGCTTATTTAATGATAAAAAACACCCT TGTTTCTACT kraken2-2.1.2/data/FluB.fa000066400000000000000000000360031404614725400151330ustar00rootroot00000000000000>kraken:taxid|518987|NC_002204.1 Influenza B virus RNA 1, complete sequence AGCAGAAGCGGAGCTTTAAGATGAATATAAATCCATATTTTCTTTTCATAGATGTACCTATACAGGCAGCAATTTCAACA ACATTCCCATACACCGGTGTTCCCCCTTATTCTCATGGAACGGGAACAGGCTACACAATAGACACCGTGATTAGAACACA CGAGTACTCAAACAAGGGAAAACAATACATTTCTGATGTTACAGGATGTGTAATGGTAGATCCAACAAATGGGCCATTAC CCGAAGACAATGAACCGAGTGCCTATGCACAATTGGATTGTGTTCTGGAGGCTTTGGATAGAATGGATGAAGAACATCCA GGTCTGTTTCAAGCAGGGTCACAGAATGCCATGGAGGCACTAATGGTCACAACAGTGGACAAATTGACTCAGGGGAGACA GACCTTTGATTGGACGGTGTGTAGAAACCAACCTGCTGCAACGGCACTGAACACAACAATAACCTCTTTTAGGTTGAATG ATTTAAATGGAGCCGACAAGGGTGGATTAGTGCCCTTTTGCCAAGATATCATTGATTCATTAGACAAACCTGAAATGATT TTCTTCACAGTAAAGAATATAAAGAAAAAATTGCCTGCTAAAAACAGAAAGGGTTTCCTTATAAAAAGAATACCTATGAA GGTAAAAGACAGAATAACAAGAGTGGAATACATCAAAAGAGCATTATCATTAAACACAATGACTAAAGATGCTGAAAGAG GCAAACTAAAAAGAAGAGCAATTGCCACCGCTGGGATACAAATCAGAGGATTTGTATTAGTAGTTGAAAACTTGGCTAAA AATATCTGTGAAAATCTAGAGCAAAGTGGTTTACCCGTAGGTGGAAACGAAAAGAAGGCCAAACTATCAAATGCAGTGGC TAAAATGCTCAGTAATTGTCCACCAGGAGGGATCAGTATGACTGTGACAGGAGACAATACTAAATGGAATGAATGCTTAA ATCCAAGAATCTTTTTGGCTATGACTGAAAGAATAACCAGAGACAGCCCAATTTGGTTCCGGGATTTTTGTAGTATAGCA CCGGTCTTGTTCTCCAATAAAATAGCTAGATTGGGAAAAGGGTTCATGATAACAAGTAAAACAAAAAGACTAAAAGCTCA AATACCTTGTCCCGATCTGTTTAATATACCATTAGAAAGATATAATGAAGAAACAAGGGCAAAACTGAAAAAGCTAAAAC CTTTCTTCAATGAAGAAGGAACGGCATCTCTTTCGCCAGGAATGATGATGGGAATGTTTAATATGCTATCTACAGTATTA GGAGTAGCCGCACTAGGGATAAAAAACATTGGAAACAAAGAATACTTATGGGATGGACTGCAGTCTTCGGATGATTTTGC TCTGTTTGTTAATGCAAAAGATGAAGAGACATGTATGGAAGGAATAAACGATTTTTACCGAACATGTAAGCTATTGGGAA TAAACATGAGCAAAAAGAAAAGTTACTGTAATGAAACTGGGATGTTTGAATTTACCAGCATGTTTTACAGAGATGGATTT GTATCTAATTTTGCAATGGAACTCCCTTCATTTGGAGTCGCTGGAGTGAATGAATCAGCAGACATGGCAATAGGAATGAC AATAATAAAGAACAATATGATCAACAATGGGATGGGCCCAGCAACGGCACAAACAGCCATACAATTATTCATAGCTGACT ATAGATACACCTACAAATGCCACAGGGGAGATTCCAAAGTGGAAGGGAAGAGAATGAAAATTATAAAGGAGCTATGGGAA AACACTAAAGGAAGAGATGGTCTATTAGTAGCAGATGGTGGGCCTAATCTTTACAATTTGAGAAACCTGCATATTCCAGA AATAATATTAAAATACAACATAATGGACCCTGAGTACAAAGGACGGTTACTGCATCCTCAAAATCCCTTTGTAGGACATT TGTCTATTGAGGGTATCAAAGAAGCAGATATAACACCTGCACATGGCCCAATAAAGAAAATGGACTACGATGCGGTATCT GGAACTCATAGTTGGAGAACCAAAAGGAACAGATCTATACTAAACACTGATCAGAGGAACATGATTCTTGAGGAACAATG CTACGCTAAGTGTTGCAACCTTTTTGAGGCTTGCTTTAACAGTGCGTCATACAGGAAACCAGTAGGCCAGCACAGCATGC TTGAAGCTATGGCCCACAGATTAAGAATGGATGCACGACTGGACTATGAGTCAGGAAGGATGTCAAAAGAGGATTTCGAA AAAGCAATGGCTCACCTTGGTGAGATTGGGTACATGTAAGCTCCGGAAATGTCTATGGGGTTATTGGTCATCGTTGAATA CATGCGGTGCACAAATGATTAAAATGAAAAAAGGCTCGTGTTTCTACT >kraken:taxid|518987|NC_002205.1 Influenza B virus (B/Lee/1940) segment 2, complete sequence ATGACGTTGGCTAAAATTGAACTACTAAAGCAGCTGTTAAGGGACAATGAAGCCAAAACGGTGTTGAGACAGACAACGGT AGACCAATACAACATAATAAGAAAATTCAATACATCAAGAATTGAAAAGAACCCTTCATTAAGAATGAAGTGGGCCATGT GTTCCAATTTTCCCTTAGCTCTGACCAAGGGTGATATGGCAAATCGAATCCCCTTGGAATACAAGGGAATACAACTTAAA ACAAATGCTGAAGACATAGGAACTAAAGGACAAATGTGTTCAATAGCAGCAGTTACCTGGTGGAATACATATGGGCCCAT AGGGGATACTGAAGGGTTTGAAAAGGTCTACGAAAGCTTTTTTCTCAGAAAGATGAGACTTGACAATGCCACTTGGGGCC GAATAACCTTTGGCCCTGTTGAGAGAGTAAGAAAAAGAGTACTACTAAACCCGCTCACCAAGGAAATGCCCCCAGATGAA GCGAGCAATGTAATAATGGAAATATTATTCCCTAAAGAAGCAGGAATACCAAGAGAATCTACTTGGATACATAGAGAACT GATAAAAGAAAAAAGAGAAAAATTGAAGGGAACGATGATAACTCCCATTGTACTGGCATACATGCTTGAGAGAGAACTAG TTGCCCGAAGAAGGTTCCTGCCAGTAGCAGGAGCAACATCAGCAGAGTTCATAGAAATGCTACATTGCTTACAAGGTGAA AATTGGAGACAAATATATCATCCAGGAGGGAATAAACTAACTGAATCTAGATCTCAATCAATGATTGTAGCTTGCAGGAA GATAATCAGAAGATCAATAGTTGCATCAAACCCACTAGAGCTAGCTGTAGAGATTGCAAATAAGACTGTGATAGACACTG AACCTTTAAAATCATGTCTGGCAGCCCTGGATGGAGGTGATGTAGCCTGTGACATAATAAGAGCTGCATTAGGATTAAAA ATTAGACAAAGACAAAGATTTGGGAGACTTGAACTAAAGAGAATATCAGGAAGAGGATTCAAAAATGATGAAGAGATATT AATCGGAAACGGAACAATACAAAAGATTGGAATATGGGACGGAGAAGAGGAATTCCATGTAAGATGTGGCGAATGCAGGG GGATATTGAAAAAAAGCCAAATGAGAATGGAAAAACTACTGATAAATTCAGCCAAAAAGGAGGACATGAAAGATTTAATA ATCTTATGCATGGTATTTTCTCAAGACACTAGGATGTTCCAAGGAGTGAGAGGAGAGATAAATTTTCTTAATCGAGCAGG CCAACTTTTATCCCCCATGTACCAACTCCAACGATACTTTCTGAATAGGAGCAATGACCTTTTTGATCAATGGGGATATG AGGAATCACCTAAAGCAAGTGAGCTACATGGGATAAATGAATTAATGAATGCATCTGACTATACATTGAAAGGGGTTGTA GTAACAAAAAATGTGATTGATGATTTTAGTTCTACTGAAACAGAAAAAGTATCTATAACAAAAAATCTTAGTTTAATAAA AAGGACTGGGGAAGTTATAATGGGAGCCAATGACGTAAGTGAATTAGAATCACAAGCACAGCTAATGATAACGTATGATA CACCCAAGATGTGGGAAATGGGAACAACCAAAGAACTGGTACAAAACACTTACCAATGGGTGCTTAAAAATTTAGTAACA TTGAAGGCTCAGTTTCTTTTGGGAAAAGAAGACATGTTCCAATGGGATGCATTTGAAGCATTTGAAAGCATAATCCCTCA GAAGATGGCTGGTCAGTACAGTGGATTTGCAAGAGCAGTGCTCAAACAAATGAGAGACCAAGAGGTTATGAAAACTGACC AATTCATAAAATTGTTGCCTTTCTGTTTTTCGCCACCAAAATTAAGGAGCAATGGAGAGCCTTATCAATTTTTGAGGCTT ATGCTGAAAGGAGGAGGGGAAAATTTCATCGAAGTAAGGAAAGGGTCCCCCTTGTTCTCCTACAATCCACAAACGGAAAT CCTAACTATATGCGGCAGAATGATGTCATTAAAAGGAAAAATTGAGGATGAAGAAAGAAATAGATCAATGGGGAATGCAG TACTGGCAGGCTTTCTTGTTAGTGGCAAATATGACCCTGATCTTGGAGATTTCAAAACCATTGAGGAACTTGAAAGACTA AAACCGGGAGAAAAAGCCAACATCTTACTTTACCAAGGAAAGCCCGTTAAAGTAGTTAAAAGGAAAAGATATAGTGCTTT ATCCAATGATATTTCACAAGGGATTAAGAGACAAAGAATGACAGTTGAGTCCATGGGGTGGGCCTTGAGCTAA >kraken:taxid|518987|NC_002206.1 Influenza B virus (B/Lee/1940) segment 3, complete sequence ATGGATACTTTTATTACAAAGAATTTCCAGACTACAATAATACAAAAGGCCAAAAACACAATGGCAGAATTTAGTGAAGA TCCTGAATTACAGCCAGCAGTACTATTCAACATCTGCGTCCATCTGGAGGTCTGCTATGTAATAAGTGATATGAACTTTC TTGATGAGGAAGGAAAGACATATACAGCATTAGAAGGACAAGGAAAAGAGCAAAATTTGAGACCACAGTATGAAGTGATT GAGGGAATGCCAAGAAACATAGCATGGATGGTTCAAAGATCCTTAGCCCAAGAGCATGGAATAGAGACTCCAAGGTATCT GGCTGATTTATTTGATTATAAAACCAAGAGGTTTATCGAAGTCGGAATAACAAAGGGATTGGCTGATGATTACTTTTGGA AAAAGAAAGAAAAGTTGGGGAATAGCATGGAACTGATGATATTCAGCTACAATCAAGACTACTCGTTAAGTGATGAATCT TCATTGGATGAGGAAGGAAAAGGGAGAGTGCTAAGCAGACTCACAGAACTTCAGGCTGAGTTAAGTTTGAAAAACCTATG GCAAGTTCTAATAGGGGAAGAAGAAATTGAAAAAGGAATTGACTTCAAACTTGGACAAACAATATCTAAACTGAGGAATA TATCTGTTCCAGCTGGTTTCTCCAATTTTGAAGGGATGAGAAGTTACATAGACAACATAGACCCTAAAGGAGCAATAGAG AGAAATCTAGCAAGGATGTCTCCCTTAGTATCAGTTACACCCAAAAAGTTGAAATGGGAGGACCTGAGACCCATAGGGCC TCACATTTACAACCATGAGCTACCAGAAGTTCCATATAATGCCTTTCTCCTCATGTCTGATGAGTTGGGGCTGGCCAATA TGACTGAAGGAAAGTCCAAGAAACCGAAGACCTTAGCTAAGGAATGTCTAGAAAGGTATTCAACACTACGTGATCAAACT GACCCAATATTGATAATGAAAAGCGAAAAAGCTAACGAAAACTTCTTATGGAGGTTATGGAGGGACTGTGTAAATACAAT AAGCAATGAGGAAACAGGCAACGAATTACAGAAAACCAATTATGCCAAGTGGGCCACAGGAGATGGACTAACATACCAAA AAATAATGAAAGAAGTAGCAATAGATGACGAAACGATGTACCAAGAAGAACCCAAAATACCCAATAAATGTAGAGTGGCT GCTTGGGTTCAGGCAGAGATGAATCTACTGAGTACTCTGACAAGTAAAAGGGCCCTGGATCTGCCAGAAATAGGGCCAGA TGTAGCACCCGTGGAGCATGTAGGGAGTGAAAGAAGGAAATACTTTGTTAATGAAATCAACTACTGTAAAGCCTCTACAG TTATGATGAAGTATGTACTTTTTCACACTTCATTATTAAATGAAAGCAATGCTAGTATGGGAAAATATAAAGTAATACCA ATCACCAACAGAGTGGTAAATGAAAAAGGGGAAAGCTTTGACATGCTTTATGGTCTGGCGGTTAAGGGGCAATCTCATTT GCGGGGGGACACGGATGTTGTAACAGTTGTGACTTTCGAGTTTAGTAGTACAGATCCTAGAGTGGACTCAGGAAAGTGGC CAAAATATACTGTCTTTAAAATTGGCTCCCTATTTGTGAGTGGAAGAGAAAAACCTGTGTACCTATATTGCCGAGTGAAT GGTACAAACAAAATCCAAATGAAATGGGGAATGGAAGCTAGAAGATGTCTGCTTCAATCAATGCAACAAATGGAGGCAAT TGTTGATCAAGAATCATCGATACAAGGGTATGATATGACCAAAGCTTGTTTCAAGGGAGACAGAGTGAATAATCCCAAAA CTTTCAGTATTGGGACTCAGGAAGGCAAACTAGTAAAAGGGTCCTTTGGGAAAGCACTAAGAGTAATATTCACCAAATGT TTGATGCATTATGTATTTGGAAATGCTCAATTGGAGGGGTTTAGTGCCGAATCTAGGAGACTTCTACTGTTAATTCAGGC ATTAAAAGACAGGAAGGGCCCTTGGGTATTTGACTTGGAGGGAATGTACTTTGGAGTAGAGGAATGTATTAGTAACAATC CTTGGGTAATACAGAGTGCATACTGGTTTAATGAATGGTTGGGCATTGAAAAAGAAGGAAGTAAAGTGTTAGAATCAATA GATGAAATAATGGATGAATGAACGAAGGGCATAGCGCTCAATTT >kraken:taxid|518987|NC_002207.1 Influenza B virus (B/Lee/1940) segment 4, complete sequence AGCAGAAGCGTTGCATTTTCTAATATCCACAAAATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGA TCGAATCTGCACTGGGATAACATCGTCAAACTCACCTCATGTGGTTAAAACTGCCACTCAAGGGGAAGTCAATGTGACTG GTGTGATACCACTAACAACAACACCTACCAAATCTCATTTTGCAAATCTCAAAGGAACACAGACCAGAGGAAAACTATGC CCAAACTGTTTTAACTGCACAGATCTGGACGTGGCCCTAGGCAGACCAAAATGCATGGGGAACACACCCTCCGCAAAAGT CTCAATACTCCATGAAGTCAAACCTGCTACATCTGGATGCTTTCCTATAATGCACGACAGAACAAAAATCAGACAACTAC CTAATCTTCTCAGAGGATATGAAAACATCAGGTTATCAACCAGTAATGTTATCAATACAGAGACGGCACCAGGAGGACCC TACAAGGTGGGGACCTCAGGATCTTGCCCTAACGTTGCTAATGGGAACGGCTTCTTCAACACAATGGCTTGGGTTATCCC AAAAGACAACAACAAGACAGCAATAAATCCAGTAACAGTAGAAGTACCATACATTTGTTCAGAAGGGGAAGACCAAATTA CTGTTTGGGGGTTCCACTCTGATGACAAAACCCAAATGGAAAGACTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCA TCTGCCAATGGAGTAACCACACATTATGTTTCTCAGATTGGTGGCTTCCCAAATCAAACAGAAGACGAAGGGCTAAAACA AAGCGGCAGAATTGTTGTTGATTACATGGTACAAAAACCTGGAAAAACAGGAACAATTGTTTATCAAAGAGGCATTTTAT TGCCTCAAAAAGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGGTCCTTGCCTTTAATTGGTGAAGCAGATTGC CTCCACGAAAAGTACGGTGGATTAAATAAAAGCAAGCCTTACTACACAGGAGAGCATGCAAAGGCCATAGGAAATTGCCC AATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCGCCTGCAAAACTATTAAAGGAAAGAGGTT TCTTCGGAGCTATTGCTGGTTTCTTGGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCATGGA GCACATGGAGTGGCAGTGGCAGCAGACCTTAAGAGTACACAAGAAGCTATAAACAAGATAACAAAAAATCTCAACTATTT AAGTGAGCTAGAAGTAAAAAACCTTCAAAGACTAAGCGGAGCAATGAATGAGCTTCACGACGAAATACTCGAGCTAGACG AAAAAGTGGATGATCTAAGAGCTGATACAATAAGCTCACAAATAGAGCTTGCAGTCTTGCTTTCCAACGAAGGGATAATA AACAGTGAAGATGAGCATCTCTTGGCACTTGAAAGAAAACTGAAGAAAATGCTTGGCCCCTCTGCTGTAGAAATAGGGAA TGGGTGCTTTGAAACCAAACACAAATGCAACCAGACTTGCCTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGATT TTTCTCTTCCCACTTTTGATTCATTAAACATTACTGCTGCATCTTTAAATGATGATGGCTTGGATAATCATACTATACTG CTCTACTACTCAACTGCTGCTTCTAGCTTGGCTGTAACATTAATGATAGCTATCTTCATTGTCTACATGGTCTCCAGAGA CAATGTTTCTTGTTCCATCTGTCTGTGAGGGAGATTAAGCCCTGTGTTTTCCTTTACTGTAGTGCTCATTTGCTTGTCAC CATTACAAAGAAACGTTATTGAAAAATGCTCTTGTTACTACT >kraken:taxid|518987|NC_002208.1 Influenza B virus (B/Lee/1940) segment 5, complete sequence GGCAGAAGCACAGCATTTTCTTGTGAGCTTCGAGCACTAATAAAACTGAAAATCAAAATGTCCAACATGGATATTGACAG TATAAATACCGGAACAATCGATAAAACACCAGAAGAACTGACTCCCGGAACCAGTGGGGCAACCAGACCAATCATCAAGC CAGCAACCCTTGCTCCGCCAAGCAACAAACGAACCCGAAATCCATCTCCAGAAAGGACAACCACAAGCAGTGAAACCGAT ATCGGAAGGAAAATCCAAAAGAAACAAACCCCAACAGAGATAAAGAAGAGCGTCTACAAAATGGTGGTAAAACTGGGTGA ATTCTACAACCAGATGATGGTCAAAGCTGGACTTAATGATGACATGGAAAGGAATCTAATTCAAAATGCACAAGCTGTGG AGAGAATCCTATTGGCTGCAACTGATGACAAGAAAACTGAATACCAAAAGAAAAGGAATGCCAGAGATGTCAAAGAAGGG AAGGAAGAAATAGACCACAACAAGACAGGAGGCACCTTTTATAAGATGGTAAGAGATGATAAAACCATCTACTTCAGCCC TATAAAAATTACCTTTTTAAAAGAAGAGGTGAAAACAATGTACAAGACCACCATGGGGAGTGATGGTTTCAGTGGACTAA ATCACATTATGATTGGACATTCACAGATGAACGATGTCTGTTTCCAAAGATCAAAGGGACTGAAAAGGGTTGGACTTGAC CCTTCATTAATCAGTACTTTTGCCGGAAGCACACTACCCAGAAGATCAGGTACAACTGGTGTTGCAATCAAAGGAGGTGG AACTTTAGTGGATGAAGCCATCCGATTTATAGGAAGAGCAATGGCAGACAGAGGGCTACTGAGAGACATCAAGGCCAAGA CGGCCTATGAAAAGATTCTTCTGAATCTGAAAAACAAGTGCTCTGCGCCGCAACAAAAGGCTCTAGTTGATCAAGTGATC GGAAGTAGGAACCCAGGGATTGCAGACATAGAAGACCTAACTCTGCTTGCCAGAAGCATGGTAGTTGTCAGACCCTCTGT AGCGAGCAAAGTGGTGCTTCCCATAAGCATTTATGCTAAAATACCTCAACTAGGATTCAATACCGAAGAATACTCTATGG TTGGGTATGAAGCCATGGCTCTTTATAATATGGCAACACCTGTTTCCATATTAAGAATGGGAGATGACGCAAAAGATAAA TCTCAACTATTCTTCATGTCGTGCTTCGGAGCTGCCTATGAAGATCTAAGAGTGTTATCTGCACTAACGGGCACCGAATT TAAGCCTAGATCAGCACTAAAATGCAAGGGTTTCCATGTCCCGGCTAAGGAGCAAGTAGAAGGAATGGGGGCAGCTCTGA TGTCCATCAAGCTTCAGTTCTGGGCCCCAATGACCAGATCTGGAGGGAATGAAGTAAGTGGAGAAGGAGGGTCTGGTCAA ATAAGTTGCAGCCCTGTGTTTGCAGTAGAAAGACCTATTGCTCTAAGCAAGCAAGCTGTAAGAAGAATGCTGTCAATGAA CGTTGAAGGACGTGATGCAGATGTCAAAGGAAATCTACTCAAAATGATGAATGATTCAATGGCAAAGAAAACCAGTGGAA ATGCTTTCATTGGGAAGAAAATGTTTCAAATATCAGACAAAAACAAAGTCAATCCCATTGAGATTCCAATTAAGCAGACC ATCCCCAATTTCTTCTTTGGGAGGGACACAGCAGAGGATTATGATGACCTCGATTATTAAAGCAATAAAATAGACACTAT GGCTGTGACTGTTTCAGTACGTTTGGGATGTGGGTGTTTACTCTTATTGAAATAAATGTAAAAAATGCTGTTGTTTCTAC T >kraken:taxid|518987|NC_002209.1 Influenza B virus (B/Lee/1940) segment 6, complete sequence AGCAGAAGCAGAGCATATTCTTAGAACTGAAGTGAACAGGCCAAAAATGAACAATGCTACCTTCAACTGTACAAACATTA ACCCTATTACTCACATCAGGGGGAGTATTATTATCACTATATGTGTCAGCCTCATTGTCATACTTATTGTATTCGGATGT ATTGCTAAAATTTTCATCAACAAAAACAACTGCACCAACAATGTCATTAGAGTGCACAAACGCATCAAATGCCCAGACTG TGAACCATTCTGCAACAAAAGAGATGACATTTCCACCCCCAGAGCCGGAGTGGACATACCCTCGTTTATCTTGCCAGGGC TCAACCTTTCAGAAGGCACTCCTAATTAGCCCTCATAGGTTCGGAGAGATCAAAGGAAACTCAGCTCCCTTGATAATAAG AGAACCTTTTGTTGCTTGTGGACCAAAAGAATGCAGACACTTTGCTCTGACCCATTATGCAGCTCAGCCGGGGGGATACT ACAATGGAACAAGAAAGGACAGAAACAAGCTGAGGCATCTAGTATCAGTCAAATTGGGAAAAATCCCAACTGTGGAAAAC TCCATTTTCCACATGGCAGCTTGGAGCGGATCCGCATGCCATGATGGTAGAGAATGGACATATATCGGAGTTGATGGTCC TGACAATGATGCATTGGTCAAAATAAAATATGGAGAAGCATATACTGACACATATCATTCCTATGCACACAACATCCTAA GAACACAAGAAAGTGCCTGCAATTGCATCGGGGGAGATTGTTATCTTATGATAACAGACGGCTCAGCTTCAGGAATTAGT AAATGCAGATTTCTTAAAATTAGAGAGGGTCGAATAATAAAAGAAATACTTCCAACAGGAAGAGTGGAGCACACTGAAGA GTGCACATGCGGGTTCGCCAGCAATAAAACCATAGAATGTGCCTGTAGAGACAACAGTTACACAGCAAAAAGACCCTTTG TCAAATTAAATGTGGAAACTGATACAGCTGAAATAAGATTGATGTGCACAAAGACTTATCTAGACACTCCCAGACCGGAT GATGGAAGCATAGCAGGGCCTTGCGAATCTAATGGAGACAAGTGGCTTGGAGGCATCAAAGGAGGATTCGTCCATCAAAG AATGGCATCTAAGATTGGAAGATGGTACTCCCGAACGATGTCTAAAACTAACAGAATGGGGATGGAACTGTATGTAAAGT ATGATGGTGACCCATGGACTGACAGTGATGCTCTTACTCTTAGTGGAGTAATGGTTTCCATAGAAGAACCTGGTTGGTAT TCTTTTGGCTTCGAAATAAAGGACAAGAAATGTGATGTCCCTTGTATTGGGATAGAGATGGTACACGATGGTGGAAAAGA TACTTGGCATTCAGCTGCAACAGCCATTTACTGTTTGATGGGCTCAGGACAATTGCTATGGGACACTGTCACAGGCGTTG ATATGGCTTTATAATAGAGGAATGGTTGGATCTGTTCTAAACCCTTTGTTCCTATTTTATTTGAACAGTTGTTCTTACTA GATTTAATTGTTTCTGAAAAATGCTCTTGTTACTACT >kraken:taxid|518987|NC_002210.1 Influenza B virus (B/Lee/1940) segment 7, complete sequence AGCAGAAGCACGCACTTTCTTAAAATGTCGCTGTTTGGAGACACAATTGCCTACCTGCTTTCACTAATAGAAGATGGAGA AGGCAAAGCAGAACTAGCTGAAAAATTACACTGTTGGTTCGGTGGGAAAGAATTTGACCTAGATTCTGCTTTGGAATGGA TAAAAAACAAAAGGTGCCTAACTGATATACAAAAAGCACTAATTGGTGCCTCTATATGCTTTTTAAAACCCAAAGACCAA GAAAGAAAAAGGAGATTCATCACAGAGCCCCTGTCAGGAATGGGAACAACAGCAACAAAGAAGAAAGGCCTAATTCTAGC TGAGAGAAAAATGAGAAGATGTGTAAGCTTTCATGAAGCATTTGAAATAGCAGAAGGCCACGAAAGCTCAGCATTACTAT ATTGTCTTATGGTCATGTACCTAAACCCTGAAAACTATTCAATGCAAGTAAAACTAGGAACGCTCTGTGCTTTATGCGAG AAACAAGCATCGCACTCGCATAGAGCCCATAGCAGAGCAGCAAGGTCTTCGGTACCTGGAGTAAGACGAGAAATGCAGAT GGTTTCAGCTATGAACACAGCAAAGACAATGAATGGAATGGGAAAGGGAGAAGACGTCCAAAAACTAGCAGAAGAGCTGC AAAACAACATTGGAGTGTTGAGATCTCTAGGAGCAAGTCAAAAGAATGGAGAAGGAATTGCCAAAGATGTAATGGAAGTG CTAAAACAGAGCTCTATGGGAAATTCAGCTCTTGTGAGGAAATACTTATAATGCTCGAACCACTTCAGATTCTTTCAATT TGTTCTTTCATTTTATCAGCTCTCCATTTCATGGCTTGGACAATAGGGCATTTGAATCAAATAAAAAGAGGGGTAAACTT GAAAATACAAATAAGGAATCCAAATAAGGAGGCAATAAACAGAGAGGTGTCAATTCTGAGACACAATTACCAAAAGGAAA TCCAAGCCAAAGAAACAATGAAGAAAATACTCTCTGACAACATGGAAGTATTGGGTGACCACATAGTAGTTGAAGGGCTT TCAACTGATGAGATAATAAAAATGGGTGAAACAGTTTTGGAGGTGGAAGAATTGCAATGAGCCCAATTTTCACTGTATTT CTTACTATGCATTTAAGCAAATTGTAATCAATGTCAGTGAATAAAACTGGAAAAAGTGCGTTGTTTCTACT >kraken:taxid|518987|NC_002211.1 Influenza B virus (B/Lee/1940) segment 8, complete sequence CGCAGAAGCAGAGGATTTATTTAGTCACTGGCAAACGGAAAGATGGCGGACAACATGACCACAACACAAATTGAGGTGGG TCCGGGAGCAACCAATGCCACTATAAACTTTGAAGCAGGAATTCTGGAGTGCTATGAAAGGTTTTCATGGCAAAGAGCCC TTGACTATCCTGGTCAAGACCGCCTACACAGACTAAAACGAAAATTAGAATCAAGAATAAAGACTCACAACAAGAGTGAG CCTGAGAATAAAAGGATGTCTCTTGAAGAGAGAAAAGCAATTGGGGTAAAAATGATGAAAGTGCTTCTGTTTATGGATCC CTCTGCTGGAATTGAAGGGTTTGAGCCATACTGTGTGAAAAATCCCTCAACTAGCAAATGTCCAAATTACGATTGGACCG ATTACCCTCCAACCCCAGGAAAGTACCTTGATGACATAGAAGAAGAGCCGGAAAATGTCGATCACCCAATTGAGGTAGTA TTAAGGGACATGAACAATAAAGATGCACGACAAAAGATAAAGGATGAAGTAAACACTCAGAAAGAGGGGAAATTCCGTTT GACAATAAAAAGGGATATACGTAATGTGTTGTCCTTGAGAGTGTTGGTGAACGGAACCTTCCTCAAGCACCCTAATGGAG ACAAGTCCTTATCAACTCTTCATAGATTGAATGCATATGACCAGAATGGAGGGCTTGTTGCTAAACTTGTTGCTACTGAT GATCGGACAGTGGAGGATGAAAAAGATGGCCATCGGATCCTCAACTCACTCTTCGAGCGTTTTGATGAAGGACATTCAAA GCCAATTCGAGCAGCTGAAACTGCGGTGGGAGTCTTATCCCAATTTGGTCAAGAGCACCGATTATCACCAGAAGAGGGAG ACAATTAGACTGGCCACGGAAGAACTTTATCTCTTGAGTAAAAGAATTGATGATAGTATATTGTTCCACAAAACAGTAAT AGCTAACAGCTCCATAATAGCTGACATGATTGTATCATTATCATTACTGGAAACATTGTATGAAATGAAGGATGTGGTTG AAGTGTACAGCAGGCAGTGCTTATGAATGTAAAATAAAAATCCTCTTGTTACTACT kraken2-2.1.2/data/HIV_1.fa000066400000000000000000000222401404614725400151470ustar00rootroot00000000000000>kraken:taxid|11676|NC_001802.1 Human immunodeficiency virus 1, complete genome GGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGC TTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTC AGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACCTGAAAGCGAAAGGGAAACCAGAGGAGCTCTCTCGACGCAG GACTCGGCTTGCTGAAGCGCGCACGGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCAAAAATTTTGACTAGCGGAG GCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAG GCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTG GCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTT AGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGA CAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCA GCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGG GTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACA AGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAG CTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGAC ATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTA TAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGAC CAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAAT TGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTAC ACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCC AAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGT GGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCA AATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTC TTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAG CAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAG ATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAG ATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTG GACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGT TGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAA ACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAA AAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTA GATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAA GAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTG CATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGA TCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCA ATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATC TGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCAT CCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGG GAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCAC TAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACAT GGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTA TCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAA CAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAG GAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTT AGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGG AGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAG AAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGC ATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGG AAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGA ATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAAT GGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAG CCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTA GCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTT AAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCG CCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAAT AAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCAT CCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATAC AAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGG AAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAG AAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATT AGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGA AAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTC TGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTA GACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATT AGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAG CATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAG AAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGA CATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGC CATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGA CAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTT GTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGC AGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGTAAGTAGT ACATGTAATGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCA TAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAA GACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGA TGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACC ACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCAC AGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGA TGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAG TGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTG CTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAA TAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTT GAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGG ACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCA GTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACA TCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATT TGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAAC AGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAA ATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAA TAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTA TAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACA GGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGA CAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAA GAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATG GGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAG GGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGG AAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGG AATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAA TTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAG ATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGA GGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTT TCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACA GATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGC TTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTG GAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTG AGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGC TTGGAAAGGATTTTGCTATAAGATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAA TGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGC AATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCA GGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGC TAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTAGCAGAAC TACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGAT AGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAG TGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGC TGACATCGAGCTTGCTACAAGGGACTTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGGGCGGGACTGGGGAGTGGCG AGCCCTCAGATCCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAG CTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTC kraken2-2.1.2/data/HIV_2.fa000066400000000000000000000245111404614725400151530ustar00rootroot00000000000000>kraken:taxid|11709|NC_001722.1 Human immunodeficiency virus 2, complete genome TGCAAGGGATGTTTTACAGTAGGAGGAGACATAGAATCCTAGACATATACCTAGAAAAAGAGGAAGGGATAATACCAGAT TGGCAGAATTATACTCATGGGCCAGGAGTAAGGTACCCAATGTACTTCGGGTGGCTGTGGAAGCTAGTATCAGTAGAACT CTCACAAGAGGCAGAGGAAGATGAGGCCAACTGCTTAGTACACCCAGCACAAACAAGCAGACATGATGATGAGCATGGGG AGACATTAGTGTGGCAGTTTGACTCCATGCTGGCCTATAACTACAAGGCCTTCACTCTGTACCCAGAAGAGTTTGGGCAC AAGTCAGGATTGCCAGAGAAAGAATGGAAGGCAAAACTGAAAGCAAGAGGGATACCATATAGTGAATAACAGGAACAACC ATACTTGGTCAAGGCAGGAAGTAGCTACTAAGAAACAGCTGAGGCTGCAGGGACTTTCCAGAAGGGGCTGTAACCAAGGG AGGGACATGGGAGGAGCTGGTGGGGAACGCCCTCATACTTACTGTATAAATGTACCCGCTTCTTGCATTGTATTCAGTCG CTCTGCGGAGAGGCTGGCAGATCGAGCCCTGAGAGGTTCTCTCCAGCACTAGCAGGTAGAGCCTGGGTGTTCCCTGCTGG ACTCTCACCAGTACTTGGCCGGTACTGGGCAGACGGCTCCACGCTTGCTTGCTTAAAGACCTCTTCAATAAAGCTGCCAG TTAGAAGCAAGTTAAGTGTGTGTTCCCATCTCTCCTAGTCGCCGCCTGGTCATTCGGTGTTCATCTGAGTAACAAGACCC TGGTCTGTTAGGACCCTTCTCGCTTTGGGAATCCAAGGCAGGAAAATCCCTAGCAGGTTGGCGCCCGAACAGGGACTTGA AGAGGACTGAGAAGCCCTGGAACTCGGCTGAGTGAAGGCAGTAAGGGCGGCAGGAACAAACCACGACGGAGTGCTCCTAG AAAGGCGCGGGCCGAGGTACCAAAGGCGGCGTGTGGAGCGGGAGTGAAAGAGGCCTCCGGGTGAAGGTAAGTACCTACAC CAAAAACTGTAGCCAGAAAAGGCTTGTTATCCTACCTTTAGACAGGTAGAAGATTGTGGGAGATGGGCGCGAGAAACTCC GTCTTGAGAGGGAAAAAAGCAGACGAATTAGAAAAAGTTAGGTTACGGCCCGGCGGAAAGAAAAAGTACAGGTTAAAACA TATTGTGTGGGCAGCGAATGAATTGGATAAATTCGGATTGGCAGAGAGCCTGTTGGAGTCAAAAGAAGGTTGCCAAAAGA TTCTCAGAGTTTTAGATCCATTAGTACCAACAGGGTCAGAAAATTTAAAAAGCCTTTTTAATACCGTCTGCGTCATTTGG TGCTTGCACGCAGAAGAGAAAGTGAAAGATACTGAGGAAGCAAAGAAACTAGCACAGAGACATCTAGTGGCAGAAACTGG AACTGCAGAGAAAATGCCAAATACAAGTAGACCAACAGCACCACCTAGTGGGAAAAGAGGAAACTACCCCGTGCAACAAG CGGGTGGCAACTATGTCCATGTGCCACTGAGCCCCCGAACTCTAAATGCATGGGTAAAATTAGTGGAGGAAAAGAAGTTC GGGGCAGAAGTAGTGCCAGGATTTCAGGCACTCTCAGAAGGCTGCACGCCCTATGATATTAATCAAATGCTTAATTGTGT GGGCGATCACCAAGCAGCTATGCAAATAATCAGAGAGATTATTAATGAAGAAGCAGCAGACTGGGATTCGCAGCACCCAA TACCAGGCCCCTTACCAGCAGGACAGCTCAGAGACCCAAGAGGGTCTGACATAGCAGGAACAACAAGCACAGTAGATGAA CAGATCCAGTGGATGTATAGGCCACAAAATCCCGTACCGGTAGGGAACATCTACAGAAGATGGATCCAAATAGGGCTGCA AAAGTGTGTCAGAAAGTACAACCCAACTAACATCTTAGACATAAAACAGGGACCAAAAGAACCGTTCCAAAGCTATGTAG ACAGGTTCTACAAAAGCTTGAGGGCAGAACAAACAGACCCAGCAGTAAAAAATTGGATGACCCAAACGCTGCTAATACAG AATGCCAACCCAGACTGCAAGTTAGTACTAAAAGGACTGGGGATGAATCCCACCCTAGAAGAGATGCTAACCGCCTGCCA GGGGGTAGGCGGACCAGGCCAGAAAGCCAGGCTAATGGCTGAAGCCCTAAAAGAGGCTATGGGACCAAGCCCTATCCCAT TTGCAGCAGCCCAACAAAGAAAGGCAATTAGGTATTGGAACTGTGGAAAGGAGGGACACTCGGCAAGACAGTGCCGAGCA CCTAGAAGACAGGGCTGCTGGAAGTGTGGCAAGCCAGGACACATCATGGCAAACTGCCCGGAAAGACAGGCAGGTTTTTT AGGGTTGGGCCCACGGGGAAAGAAGCCTCGCAACTTCCCCGTGACCCAAGCCCCTCAGGGGCTGATACCAACAGCACCTC CGGCAGATCCAGCAGCGGAACTGTTGGAGAGATATATGCAGCAAGGGAGAAAGCAGAGGGAGCAGAGGGAGAGACCATAC AAAGAGGTGACGGAGGACTTGCTGCACCTCGAGCAGAGAGAGACACCTCACAGAGAGGAGACAGAGGACTTGCTGCACCT CAATTCTCTCTTTGGAAAAGACCAGTAGTCACAGCGTACATCGAGGATCAGCCGGTAGAAGTCTTACTAGACACAGGGGC TGATGACTCAATAGTAGCAGGAATAGAATTAGGGGACAATTACACTCCAAAAATAGTAGGGGGAATAGGGGGATTTATAA ACACCAAAGAATACAAAAATGTAGAAATAAAAGTACTAAATAAAAGAGTAAGAGCCACCATAATGACAGGAGATACCCCA ATCAACATCTTTGGCAGAAATATTCTGACAGCCTTAGGCATGTCATTAAATTTACCAGTTGCCAAGATAGAGCCAATAAA AGTAACATTGAAGCCAGGGAAAGATGGACCAAGGCTGAAACAATGGCCCCTAACAAAAGAGAAAATAGAAGCACTAAAAG AGATCTGTGAAAAAATGGAAAAAGAGGGCCAGCTAGAAGAGGCACCTCCAACTAATCCTTATAATACCCCCACATTTGCA ATTAAGAAAAAGGACAAGAACAAATGGAGGATGCTGATAGATTTTAGAGAACTAAATAAGGTGACTCAAGATTTCACAGA AATTCAGCTAGGAATTCCACACCCGGCAGGACTAGCCAAAAAGAAAAGGATCTCTATATTAGATGTAGGGGATGCCTATT TTTCCATACCACTACATGAAGATTTTAGGCAGTATACTGCATTTACCCTACCAGCAGTAAACAATATGGAACCAGGAAAA AGATATATATATAAAGTCTTGCCACAAGGATGGAAGGGATCACCAGCAATTTTTCAATACACAATGAGGCAAGTCTTAGA ACCTTTCAGAAAAGCAAACCCAGATGTCATTCTCATCCAGTACATGGATGATATCTTAATAGCTAGTGACAGGACAGGTT TAGAGCATGACAAAGTGGTCCTGCAGCTAAAAGAACTTCTAAATGGCCTAGGGTTTTCTACTCCAGATGAGAAGTTCCAA AAAGACCCTCCATTTCAATGGATGGGCTGTGAACTATGGCCAACTAAATGGAAGCTGCAGAAACTACAACTGCCCCAGAA AGACATATGGACAGTCAATGACATCCAAAAGCTAGTGGGAGTCTTAAATTGGGCGGCACAAATCTATTCAGGAATAAAAA CCAAACACTTATGTAGACTAATTAGAGGAAAAATGACACTCACAGAAGAAGTGCAGTGGACAGAACTAGCAGAAGCAGAG CTAGAAGAAAACAAAATTATCTTGAGCCAGGAACAAGAAGGATATTATTACCAAGAAGAAAAAGAATTAGAGGCAACAAT CCAAAAAAGCCAAGGACATCAATGGACATACAAAATACACCAGGAAGAGAAAATCCTAAAAGTAGGAAAGTATGCAAAGA TAAAAAATACCCATACCAATGGGGTCAGATTACTAGCACAGGTAGTTCAGAAAATAGGAAAAGAGGCACTAGTCATTTGG GGACGGATACCAAAATTTCACCTGCCAGTGGAGAGAGAGACCTGGGAGCAGTGGTGGGATAACTACTGGCAAGTGACATG GATCCCAGAGTGGGACTTTGTATCTACCCCACCACTGGTCAGGTTAACATTTAACCTAGTAGGAGATCCTATACCAGGCG CAGAGACCTTCTACACAGATGGATCATGCAATAGACAGTCAAAAGAGGGAAAAGCAGGATATGTAACAGATAGAGGAAAA GACAAAGTAAAAGTATTAGAACAAACTACCAATCAGCAGGCAGAATTAGAAGTCTTTCGGATGGCACTGGCAGACTCAGG CCCAAAGGTTAATATCATAGTAGATTCACAGTATGTAATGGGGATAGTAGCAGGCCAGCCAACAGAGTCAGAAAATAGAA TAGTGAACCAGATCATAGAAGAAATGATAAAGAAGGAAGCAGTCTATGTTGCATGGGTCCCAGCCCATAAAGGCATAGGA GGAAACCAGGAAGTAGACCATTTAGTAAGTCAAGGCATCAGACAAGTATTATTCCTGGAAAAGATAGAGCCCGCTCAAGA GGAACATGAAAAATATCATAGCATTATAAAAGAACTAACCCATAAATTTGGAATACCCCTTCTAGTAGCAAGACAGATAG TAAACTCATGTGCCCAATGCCAACAGAAAGGAGAAGCCATACATGGGCAAGTAAATGCAGAAATAGGCGTTTGGCAAATG GACTACACACACTTAGAAGGAAAAATCATTATAGTAGCAGTACATGTTGCAAGTGGATTCATAGAAGCAGAAGTCATCCC ACAGGAATCAGGAAGGCAGACAGCACTCTTCCTATTAAAACTGGCCAGTAGGTGGCCAATAACGCACTTGCACACAGACA ATGGCCCCAACTTCACTTCACAGGAAGTGAAGATGGTGGCATGGTGGGTAGGTATAGAACAATCCTTTGGAGTACCTTAC AACCCACAAAGCCAGGGAGTAGTAGAAGCAATGAATCACCACCTAAAGAATCAGATAAGTAGAATTAGAGAACAGGCAAA TACAATAGAAACAATAGTACTGATGGCAGTTCATTGCATGAATTTTAAAAGAAGGGGAGGAATAGGGGATATGACCCCAG CAGAAAGACTAATCAACATGATTACCACAGAACAAGAAATACAATTCCTCCAAAGAAAAAATTCAAATTTTAAAAATTTC CAGGTCTATTACAGAGAAGGCAGAGATCAGCTGTGGAAAGGACCTGGTGAACTACTGTGGAAGGGAGAAGGAGCAGTCAT AGTCAAGGTAGGGACAGACATAAAAGTAGTACCAAGAAGGAAGGCCAAGATTATCAGGGACTATGGAGGAAGACAGGAAC TGGATAGTAGTCCCCACCTGGAGGGTGCCAGGGAGGATGGAGAAATGGCATGCCCTTGTCAAGTACCTGAAATACAGAAC AAAAGACCTAGAGGAGGTGCGCTATGTTCCCCACCACAAGGTGGGATGGGCATGGTGGACTTGCAGCAGGGTAATATTCC CACTACAAGGAAAAAGTCATCTAGAAATACAGGCATATTGGAACCTAACACCAGAAAAAGGATGGCTCTCCTCTCATGCA GTAAGATTAACCTGGTATACAGAAAAGTTCTGGACAGATGTTACCCCAGACTGTGCAGACATCCTAATACATAGCACTTA TTTCTCTTGCTTTACGGCAGGTGAAGTAAGAAGAGCCATCAGAGGGGAAAAGTTATTGTCCTGCTGCAACTATCCCCAAG CTCATAAAGCACAGGTACCATCACTTCAATACCTAGCCCTAGTAGTAGTACAACAAAATGACAGACCCCAGAGAAAGGGT ACCGCCAGGAAACAGTGGAGAAGAGACCATTGGAGAGGCCTTCGAGTGGCTAGAGAGGACCATAGAAGCCTTAAACAGGG AGGCAGTGAACCATCTGCCCCGAGAGCTCATTTTCCAGGTGTGGCAAAGGTCCTGGAGATATTGGCATGATGAACAAGGG ATGTCAGCAAGCTACACAAAGTATAGATATTTGTGCCTAATGCAAAAAGCTATATTTACACATTTCAAGAGAGGGTGCAC TTGCTGGGGGGAGGACATGGGCCGGGAAGGATTGGAAGACCAAGGACCTCCCCCTCCTCCCCCTCCAGGTCTAGTCTAAT GACTGAAGCACCAACAGAGTTTCCCCCAGAAGATGGGACCCCACGGAGGGACTTAGGGAGTGACTGGGTAATAGAAACTC TGAGGGAAATAAAGGAAGAAGCCTTAAGACATTTTGATCCCCGCTTGCTAATTGCTCTTGGCTACTATATCCATAATAGA CATGGAGACACCCTTGAAGGCGCCAGAGAGCTCATTAAAACCCTACAACGAGCCCTCTTCGTGCACTTCAGAGCGGGATG TAACCGCTCAAGAATTGGCTAAACAAGGAGAAGAACTCCTTGCCCAGCTGCACCGACCCCTAGAGCCATGCACTAACAAA TGCTATTGTAAGCGATGCAGTTTCCATTGCCAGCTGTGTTTCTCGAAAAAGGGGCTCGGAATATCATATGAGCGAAAGGG CAGACGAAGAAGGACTCCAAGGAAAACTAAGACTCCTTCGCCTTCTGCACCAGACAAGTGAGTATGGAGCCTGGTAGGAA TCAGCTGTTTGTTGTCATTTTACTAACAAGTGCTTGCTTAGTATATTGTAGCCAGTATGTGACTGTTTTCTATGGCATAC CCGCGTGGAAAAATGCATCTATTCCCTTATTTTGTGCAACTAAAAATAGAGACACTTGGGGGACCATACAGTGCTTGCCA GACAATGATGATTATCAGGAAATAATTTTAAATGTGACAGAGGCTTTTGATGCATGGAATAATACAGTGACAGAACAAGC AGTAGAAGATGTCTGGCATCTATTTGAGACATCAATAAAACCATGTGTCAAGCTAACACCTCTATGTGTGGCAATGAATT GTAGCAGGGTTCAAGGGAATACCACGACCCCGAATCCCAGGACCTCGAGTTCCACAACCTCGAGACCACCCACATCCGCA GCCTCCATAATAAATGAAACTTCTAACTGCATAGAAAACAACACATGCGCAGGATTAGGGTATGAGGAGATGATGCAATG TGAGTTCAATATGAAGGGGTTAGAACAAGATAAGAAAAGGAGGTATAAGGACACATGGTATTTAGAAGATGTGGTTTGTG ACAACACAACAGCTGGCACATGTTACATGAGACATTGCAACACATCAATCATCAAAGAGTCATGTGATAAGCACTATTGG GATGCTATGAGGTTTAGATACTGTGCACCACCGGGCTTTGCCCTATTAAGATGTAATGATACCAACTATTCAGGCTTTGA ACCTAAGTGCACTAAAGTAGTAGCTGCTTCATGCACAAGGATGATGGAAACGCAAACTTCTACTTGGTTTGGCTTTAATG GCACTAGAGCAGAAAATAGAACATATATCTATTGGCATGGCAGAGATAATAGGACTATCATTAGCTTAAACAAGTATTAT AATCTCACAATGCGTTGTAAGAGACCAGGAAATAAGACAGTTTTACCAATAACACTTATGTCAGGATTAGTGTTTCACTC TCAGCCAATCAACACAAGGCCTAGGCAGGCATGGTGCCGGTTTGGAGGCAGATGGAGGGAAGCCATGCAGGAGGTGAAGC AAACCCTTGTACAACATCCCAGATACAAAGGAATCAATGATACAGGGAAAATTAACTTTACGAAACCGGGAGCAGGCTCA GACCCGGAAGTGGCATTTATGTGGACTAACTGCAGAGGAGAATTTCTCTACTGTAACATGACTTGGTTCCTCAATTGGGT AGAAGACAAGAACCAAACACGGCGCAACTATTGCCATATAAAGCAGATAATTAATACCTGGCATAAAGTAGGGAAAAATG TATATTTGCCTCCTAGGGAAGGGGAGTTGGCCTGTGAATCAACAGTAACCAGCATAATTGCTAACATTGACATAGATAAA AATCGGACTCATACCAACATTACCTTTAGTGCAGAAGTGGCAGAACTGTACCGATTAGAACTGGGAGACTACAAATTAAT AGAAATAACACCAATTGGCTTCGCACCTACAGATCAGAGAAGGTACTCCTCAACTCCAGTGAGGAACAAAAGAGGTGTGT TCGTGCTAGGGTTCTTGGGTTTTCTCGCGACAGCAGGTTCTGCAATGGGCGCGCGGTCCCTGACGCTGTCAGCCCAGTCC CGGACTTTACTGGCCGGGATAGTGCAGCAACAGCAACAGCTGTTGGACGTAGTCAAGAGACAACAAGAAATGTTGCGACT GACCGTCTGGGGAACGAAAAACCTCCAGGCAAGAGTCACTGCTATCGAGAAGTACCTAAAGCATCAGGCACAGCTAAATT CATGGGGATGTGCGTTTAGACAGGTCTGCCACACTACTGTACCGTGGGTAAATGACTCTTTATCGCCTGACTGGAAAAAT ATGACATGGCAGGAGTGGGAGAAACAAGTCCGCTACCTAGAGGCAAATATCAGTCAAAGTTTAGAAGAAGCCCAAATTCA ACAAGAAAAGAATATGTATGAATTACAAAAATTAAATAGCTGGGATATTCTTGGCAACTGGTTTGACTTAACCTCCTGGG TCAAGTATATTCAATATGGAGTGCATATAGTAGTGGGAATAATAGCTTTAAGAATAGCAATCTATGTAGTGCAATTGTTA AGTAGATTTAGAAAGGGCTATAGGCCTGTTTTCTCTTCCCCCCCCGGTTATCTCCAACAGATCCATATCCACAAGGACCG GGGACAGCCAGCCAACGAAGGAACAGAAGAAGACGTCGGAGGCGACAGTGGTTACGACTTGTGGCCTTGGCCAATAAACT ATGTGCAGTTCCTGATCCACCTACTGACTCGCCTCTTGATCGGGCTATACAACATCTGCAGAGACTTACTATCCAAGAAC TCCCCGACCCGCCGACTGATCTCCCAGAGTCTAACAGCAATCAGGGACTGGCTGAGACTTAAGGCGGCCCAACTGCAATA TGGGTGCGAGTGGATCCAAGAAGCTTTCCAAGCATTCGCGAGGACTACGAGAGAGACTCTTGCGGGCGCGTGGGGATGGT TATGGGAAGCAGCGCGACGCATCGGGAGGGGAATACTCGCAGTTCCAAGAAGAATCAGGCAGGGAGCAGAACTCGCCCTC CTGTGAGGGACAGCAGTATCAGCAGGGAGAGTACATGAACAGCCCATGGAGAAACCCAGCAACAGAAAGACAGAAAGATT TGTATAGGCAGCAAAATATGGATGATGTAGATTCTGATGATGATGACCTAATAGGAGTTCCTGTTACACCAAGAGTACCA CGGAGAGAAATGACCTATAAATTGGCAATAGATATGTCACATTTTATAAAAGAAAAAGGGGGACTGCAAGGGATGTTTTA CAGTAGGAGGAGACATAGAATCCTAGACATATACCTAGAAAAAGAGGAAGGGATAATACCAGATTGGCAGAATTATACTC ATGGGCCAGGAGTAAGGTACCCAATGTACTTCGGGTGGCTGTGGAAGCTAGTATCAGTAGAACTCTCACAAGAGGCAGAG GAAGATGAGGCCAACTGCTTAGTACACCCAGCACAAACAAGCAGACATGATGATGAGCATGGGGAGACATTAGTGTGGCA GTTTGACTCCATGCTGGCCTATAACTACAAGGCCTTCACTCTGTACCCAGAAGAGTTTGGGCACAAGTCAGGATTGCCAG AGAAAGAATGGAAGGCAAAACTGAAAGCAAGAGGGATACCATATAGTGAATAACAGGAACAACCATACTTGGTCAAGGCA GGAAGTAGCTACTAAGAAACAGCTGAGGCTGCAGGGACTTTCCAGAAGGGGCTGTAACCAAGGGAGGGACATGGGAGGAG CTGGTGGGGAACGCCCTCATACTTACTGTATAAATGTACCCGCTTCTTGCATTGTATTCAGTCGCTCTGCGGAGAGGCTG GCAGATCGAGCCCTGAGAGGTTCTCTCCAGCACTAGCAGGTAGAGCCTGGGTGTTCCCTGCTGGACTCTCACCAGTACTT GGCCGGTACTGGGCAGACGGCTCCACGCTTGCTTGCTTAAAGACCTCTTCAATAAAGCTGCCAGTTAGAAGCAAGTTAAG TGTGTGTTCCCATCTCTCCTAGTCGCCGCCTGGTCATTCGGTGTTCATCTGAGTAACAAGACCCTGGTCTGTTAGGACCC TTCTCGCTTTGGGAATCCAAGGCAGGAAAATCCCTAGCA kraken2-2.1.2/data/Lambda.fa000066400000000000000000001400421404614725400154620ustar00rootroot00000000000000>kraken:taxid|10710|NC_001416.1 Enterobacteria phage lambda, complete genome GGGCGGCGACCTCGCGGGTTTTCGCTATTTATGAAAATTTTCCGGTTTAAGGCGTTTCCGTTCTTCTTCGTCATAACTTA ATGTTTTTATTTAAAATACCCTCTGAAAAGAAAGGAAACGACAGGTGCTGAAAGCGAGGCTTTTTGGCCTCTGTCGTTTC CTTTCTCTGTTTTTGTCCGTGGAATGAACAATGGAAGTCAACAAAAAGCAGCTGGCTGACATTTTCGGTGCGAGTATCCG TACCATTCAGAACTGGCAGGAACAGGGAATGCCCGTTCTGCGAGGCGGTGGCAAGGGTAATGAGGTGCTTTATGACTCTG CCGCCGTCATAAAATGGTATGCCGAAAGGGATGCTGAAATTGAGAACGAAAAGCTGCGCCGGGAGGTTGAAGAACTGCGG CAGGCCAGCGAGGCAGATCTCCAGCCAGGAACTATTGAGTACGAACGCCATCGACTTACGCGTGCGCAGGCCGACGCACA GGAACTGAAGAATGCCAGAGACTCCGCTGAAGTGGTGGAAACCGCATTCTGTACTTTCGTGCTGTCGCGGATCGCAGGTG AAATTGCCAGTATTCTCGACGGGCTCCCCCTGTCGGTGCAGCGGCGTTTTCCGGAACTGGAAAACCGACATGTTGATTTC CTGAAACGGGATATCATCAAAGCCATGAACAAAGCAGCCGCGCTGGATGAACTGATACCGGGGTTGCTGAGTGAATATAT CGAACAGTCAGGTTAACAGGCTGCGGCATTTTGTCCGCGCCGGGCTTCGCTCACTGTTCAGGCCGGAGCCACAGACCGCC GTTGAATGGGCGGATGCTAATTACTATCTCCCGAAAGAATCCGCATACCAGGAAGGGCGCTGGGAAACACTGCCCTTTCA GCGGGCCATCATGAATGCGATGGGCAGCGACTACATCCGTGAGGTGAATGTGGTGAAGTCTGCCCGTGTCGGTTATTCCA AAATGCTGCTGGGTGTTTATGCCTACTTTATAGAGCATAAGCAGCGCAACACCCTTATCTGGTTGCCGACGGATGGTGAT GCCGAGAACTTTATGAAAACCCACGTTGAGCCGACTATTCGTGATATTCCGTCGCTGCTGGCGCTGGCCCCGTGGTATGG CAAAAAGCACCGGGATAACACGCTCACCATGAAGCGTTTCACTAATGGGCGTGGCTTCTGGTGCCTGGGCGGTAAAGCGG CAAAAAACTACCGTGAAAAGTCGGTGGATGTGGCGGGTTATGATGAACTTGCTGCTTTTGATGATGATATTGAACAGGAA GGCTCTCCGACGTTCCTGGGTGACAAGCGTATTGAAGGCTCGGTCTGGCCAAAGTCCATCCGTGGCTCCACGCCAAAAGT GAGAGGCACCTGTCAGATTGAGCGTGCAGCCAGTGAATCCCCGCATTTTATGCGTTTTCATGTTGCCTGCCCGCATTGCG GGGAGGAGCAGTATCTTAAATTTGGCGACAAAGAGACGCCGTTTGGCCTCAAATGGACGCCGGATGACCCCTCCAGCGTG TTTTATCTCTGCGAGCATAATGCCTGCGTCATCCGCCAGCAGGAGCTGGACTTTACTGATGCCCGTTATATCTGCGAAAA GACCGGGATCTGGACCCGTGATGGCATTCTCTGGTTTTCGTCATCCGGTGAAGAGATTGAGCCACCTGACAGTGTGACCT TTCACATCTGGACAGCGTACAGCCCGTTCACCACCTGGGTGCAGATTGTCAAAGACTGGATGAAAACGAAAGGGGATACG GGAAAACGTAAAACCTTCGTAAACACCACGCTCGGTGAGACGTGGGAGGCGAAAATTGGCGAACGTCCGGATGCTGAAGT GATGGCAGAGCGGAAAGAGCATTATTCAGCGCCCGTTCCTGACCGTGTGGCTTACCTGACCGCCGGTATCGACTCCCAGC TGGACCGCTACGAAATGCGCGTATGGGGATGGGGGCCGGGTGAGGAAAGCTGGCTGATTGACCGGCAGATTATTATGGGC CGCCACGACGATGAACAGACGCTGCTGCGTGTGGATGAGGCCATCAATAAAACCTATACCCGCCGGAATGGTGCAGAAAT GTCGATATCCCGTATCTGCTGGGATACTGGCGGGATTGACCCGACCATTGTGTATGAACGCTCGAAAAAACATGGGCTGT TCCGGGTGATCCCCATTAAAGGGGCATCCGTCTACGGAAAGCCGGTGGCCAGCATGCCACGTAAGCGAAACAAAAACGGG GTTTACCTTACCGAAATCGGTACGGATACCGCGAAAGAGCAGATTTATAACCGCTTCACACTGACGCCGGAAGGGGATGA ACCGCTTCCCGGTGCCGTTCACTTCCCGAATAACCCGGATATTTTTGATCTGACCGAAGCGCAGCAGCTGACTGCTGAAG AGCAGGTCGAAAAATGGGTGGATGGCAGGAAAAAAATACTGTGGGACAGCAAAAAGCGACGCAATGAGGCACTCGACTGC TTCGTTTATGCGCTGGCGGCGCTGCGCATCAGTATTTCCCGCTGGCAGCTGGATCTCAGTGCGCTGCTGGCGAGCCTGCA GGAAGAGGATGGTGCAGCAACCAACAAGAAAACACTGGCAGATTACGCCCGTGCCTTATCCGGAGAGGATGAATGACGCG ACAGGAAGAACTTGCCGCTGCCCGTGCGGCACTGCATGACCTGATGACAGGTAAACGGGTGGCAACAGTACAGAAAGACG GACGAAGGGTGGAGTTTACGGCCACTTCCGTGTCTGACCTGAAAAAATATATTGCAGAGCTGGAAGTGCAGACCGGCATG ACACAGCGACGCAGGGGACCTGCAGGATTTTATGTATGAAAACGCCCACCATTCCCACCCTTCTGGGGCCGGACGGCATG ACATCGCTGCGCGAATATGCCGGTTATCACGGCGGTGGCAGCGGATTTGGAGGGCAGTTGCGGTCGTGGAACCCACCGAG TGAAAGTGTGGATGCAGCCCTGTTGCCCAACTTTACCCGTGGCAATGCCCGCGCAGACGATCTGGTACGCAATAACGGCT ATGCCGCCAACGCCATCCAGCTGCATCAGGATCATATCGTCGGGTCTTTTTTCCGGCTCAGTCATCGCCCAAGCTGGCGC TATCTGGGCATCGGGGAGGAAGAAGCCCGTGCCTTTTCCCGCGAGGTTGAAGCGGCATGGAAAGAGTTTGCCGAGGATGA CTGCTGCTGCATTGACGTTGAGCGAAAACGCACGTTTACCATGATGATTCGGGAAGGTGTGGCCATGCACGCCTTTAACG GTGAACTGTTCGTTCAGGCCACCTGGGATACCAGTTCGTCGCGGCTTTTCCGGACACAGTTCCGGATGGTCAGCCCGAAG CGCATCAGCAACCCGAACAATACCGGCGACAGCCGGAACTGCCGTGCCGGTGTGCAGATTAATGACAGCGGTGCGGCGCT GGGATATTACGTCAGCGAGGACGGGTATCCTGGCTGGATGCCGCAGAAATGGACATGGATACCCCGTGAGTTACCCGGCG GGCGCGCCTCGTTCATTCACGTTTTTGAACCCGTGGAGGACGGGCAGACTCGCGGTGCAAATGTGTTTTACAGCGTGATG GAGCAGATGAAGATGCTCGACACGCTGCAGAACACGCAGCTGCAGAGCGCCATTGTGAAGGCGATGTATGCCGCCACCAT TGAGAGTGAGCTGGATACGCAGTCAGCGATGGATTTTATTCTGGGCGCGAACAGTCAGGAGCAGCGGGAAAGGCTGACCG GCTGGATTGGTGAAATTGCCGCGTATTACGCCGCAGCGCCGGTCCGGCTGGGAGGCGCAAAAGTACCGCACCTGATGCCG GGTGACTCACTGAACCTGCAGACGGCTCAGGATACGGATAACGGCTACTCCGTGTTTGAGCAGTCACTGCTGCGGTATAT CGCTGCCGGGCTGGGTGTCTCGTATGAGCAGCTTTCCCGGAATTACGCCCAGATGAGCTACTCCACGGCACGGGCCAGTG CGAACGAGTCGTGGGCGTACTTTATGGGGCGGCGAAAATTCGTCGCATCCCGTCAGGCGAGCCAGATGTTTCTGTGCTGG CTGGAAGAGGCCATCGTTCGCCGCGTGGTGACGTTACCTTCAAAAGCGCGCTTCAGTTTTCAGGAAGCCCGCAGTGCCTG GGGGAACTGCGACTGGATAGGCTCCGGTCGTATGGCCATCGATGGTCTGAAAGAAGTTCAGGAAGCGGTGATGCTGATAG AAGCCGGACTGAGTACCTACGAGAAAGAGTGCGCAAAACGCGGTGACGACTATCAGGAAATTTTTGCCCAGCAGGTCCGT GAAACGATGGAGCGCCGTGCAGCCGGTCTTAAACCGCCCGCCTGGGCGGCTGCAGCATTTGAATCCGGGCTGCGACAATC AACAGAGGAGGAGAAGAGTGACAGCAGAGCTGCGTAATCTCCCGCATATTGCCAGCATGGCCTTTAATGAGCCGCTGATG CTTGAACCCGCCTATGCGCGGGTTTTCTTTTGTGCGCTTGCAGGCCAGCTTGGGATCAGCAGCCTGACGGATGCGGTGTC CGGCGACAGCCTGACTGCCCAGGAGGCACTCGCGACGCTGGCATTATCCGGTGATGATGACGGACCACGACAGGCCCGCA GTTATCAGGTCATGAACGGCATCGCCGTGCTGCCGGTGTCCGGCACGCTGGTCAGCCGGACGCGGGCGCTGCAGCCGTAC TCGGGGATGACCGGTTACAACGGCATTATCGCCCGTCTGCAACAGGCTGCCAGCGATCCGATGGTGGACGGCATTCTGCT CGATATGGACACGCCCGGCGGGATGGTGGCGGGGGCATTTGACTGCGCTGACATCATCGCCCGTGTGCGTGACATAAAAC CGGTATGGGCGCTTGCCAACGACATGAACTGCAGTGCAGGTCAGTTGCTTGCCAGTGCCGCCTCCCGGCGTCTGGTCACG CAGACCGCCCGGACAGGCTCCATCGGCGTCATGATGGCTCACAGTAATTACGGTGCTGCGCTGGAGAAACAGGGTGTGGA AATCACGCTGATTTACAGCGGCAGCCATAAGGTGGATGGCAACCCCTACAGCCATCTTCCGGATGACGTCCGGGAGACAC TGCAGTCCCGGATGGACGCAACCCGCCAGATGTTTGCGCAGAAGGTGTCGGCATATACCGGCCTGTCCGTGCAGGTTGTG CTGGATACCGAGGCTGCAGTGTACAGCGGTCAGGAGGCCATTGATGCCGGACTGGCTGATGAACTTGTTAACAGCACCGA TGCGATCACCGTCATGCGTGATGCACTGGATGCACGTAAATCCCGTCTCTCAGGAGGGCGAATGACCAAAGAGACTCAAT CAACAACTGTTTCAGCCACTGCTTCGCAGGCTGACGTTACTGACGTGGTGCCAGCGACGGAGGGCGAGAACGCCAGCGCG GCGCAGCCGGACGTGAACGCGCAGATCACCGCAGCGGTTGCGGCAGAAAACAGCCGCATTATGGGGATCCTCAACTGTGA GGAGGCTCACGGACGCGAAGAACAGGCACGCGTGCTGGCAGAAACCCCCGGTATGACCGTGAAAACGGCCCGCCGCATTC TGGCCGCAGCACCACAGAGTGCACAGGCGCGCAGTGACACTGCGCTGGATCGTCTGATGCAGGGGGCACCGGCACCGCTG GCTGCAGGTAACCCGGCATCTGATGCCGTTAACGATTTGCTGAACACACCAGTGTAAGGGATGTTTATGACGAGCAAAGA AACCTTTACCCATTACCAGCCGCAGGGCAACAGTGACCCGGCTCATACCGCAACCGCGCCCGGCGGATTGAGTGCGAAAG CGCCTGCAATGACCCCGCTGATGCTGGACACCTCCAGCCGTAAGCTGGTTGCGTGGGATGGCACCACCGACGGTGCTGCC GTTGGCATTCTTGCGGTTGCTGCTGACCAGACCAGCACCACGCTGACGTTCTACAAGTCCGGCACGTTCCGTTATGAGGA TGTGCTCTGGCCGGAGGCTGCCAGCGACGAGACGAAAAAACGGACCGCGTTTGCCGGAACGGCAATCAGCATCGTTTAAC TTTACCCTTCATCACTAAAGGCCGCCTGTGCGGCTTTTTTTACGGGATTTTTTTATGTCGATGTACACAACCGCCCAACT GCTGGCGGCAAATGAGCAGAAATTTAAGTTTGATCCGCTGTTTCTGCGTCTCTTTTTCCGTGAGAGCTATCCCTTCACCA CGGAGAAAGTCTATCTCTCACAAATTCCGGGACTGGTAAACATGGCGCTGTACGTTTCGCCGATTGTTTCCGGTGAGGTT ATCCGTTCCCGTGGCGGCTCCACCTCTGAATTTACGCCGGGATATGTCAAGCCGAAGCATGAAGTGAATCCGCAGATGAC CCTGCGTCGCCTGCCGGATGAAGATCCGCAGAATCTGGCGGACCCGGCTTACCGCCGCCGTCGCATCATCATGCAGAACA TGCGTGACGAAGAGCTGGCCATTGCTCAGGTCGAAGAGATGCAGGCAGTTTCTGCCGTGCTTAAGGGCAAATACACCATG ACCGGTGAAGCCTTCGATCCGGTTGAGGTGGATATGGGCCGCAGTGAGGAGAATAACATCACGCAGTCCGGCGGCACGGA GTGGAGCAAGCGTGACAAGTCCACGTATGACCCGACCGACGATATCGAAGCCTACGCGCTGAACGCCAGCGGTGTGGTGA ATATCATCGTGTTCGATCCGAAAGGCTGGGCGCTGTTCCGTTCCTTCAAAGCCGTCAAGGAGAAGCTGGATACCCGTCGT GGCTCTAATTCCGAGCTGGAGACAGCGGTGAAAGACCTGGGCAAAGCGGTGTCCTATAAGGGGATGTATGGCGATGTGGC CATCGTCGTGTATTCCGGACAGTACGTGGAAAACGGCGTCAAAAAGAACTTCCTGCCGGACAACACGATGGTGCTGGGGA ACACTCAGGCACGCGGTCTGCGCACCTATGGCTGCATTCAGGATGCGGACGCACAGCGCGAAGGCATTAACGCCTCTGCC CGTTACCCGAAAAACTGGGTGACCACCGGCGATCCGGCGCGTGAGTTCACCATGATTCAGTCAGCACCGCTGATGCTGCT GGCTGACCCTGATGAGTTCGTGTCCGTACAACTGGCGTAATCATGGCCCTTCGGGGCCATTGTTTCTCTGTGGAGGAGTC CATGACGAAAGATGAACTGATTGCCCGTCTCCGCTCGCTGGGTGAACAACTGAACCGTGATGTCAGCCTGACGGGGACGA AAGAAGAACTGGCGCTCCGTGTGGCAGAGCTGAAAGAGGAGCTTGATGACACGGATGAAACTGCCGGTCAGGACACCCCT CTCAGCCGGGAAAATGTGCTGACCGGACATGAAAATGAGGTGGGATCAGCGCAGCCGGATACCGTGATTCTGGATACGTC TGAACTGGTCACGGTCGTGGCACTGGTGAAGCTGCATACTGATGCACTTCACGCCACGCGGGATGAACCTGTGGCATTTG TGCTGCCGGGAACGGCGTTTCGTGTCTCTGCCGGTGTGGCAGCCGAAATGACAGAGCGCGGCCTGGCCAGAATGCAATAA CGGGAGGCGCTGTGGCTGATTTCGATAACCTGTTCGATGCTGCCATTGCCCGCGCCGATGAAACGATACGCGGGTACATG GGAACGTCAGCCACCATTACATCCGGTGAGCAGTCAGGTGCGGTGATACGTGGTGTTTTTGATGACCCTGAAAATATCAG CTATGCCGGACAGGGCGTGCGCGTTGAAGGCTCCAGCCCGTCCCTGTTTGTCCGGACTGATGAGGTGCGGCAGCTGCGGC GTGGAGACACGCTGACCATCGGTGAGGAAAATTTCTGGGTAGATCGGGTTTCGCCGGATGATGGCGGAAGTTGTCATCTC TGGCTTGGACGGGGCGTACCGCCTGCCGTTAACCGTCGCCGCTGAAAGGGGGATGTATGGCCATAAAAGGTCTTGAGCAG GCCGTTGAAAACCTCAGCCGTATCAGCAAAACGGCGGTGCCTGGTGCCGCCGCAATGGCCATTAACCGCGTTGCTTCATC CGCGATATCGCAGTCGGCGTCACAGGTTGCCCGTGAGACAAAGGTACGCCGGAAACTGGTAAAGGAAAGGGCCAGGCTGA AAAGGGCCACGGTCAAAAATCCGCAGGCCAGAATCAAAGTTAACCGGGGGGATTTGCCCGTAATCAAGCTGGGTAATGCG CGGGTTGTCCTTTCGCGCCGCAGGCGTCGTAAAAAGGGGCAGCGTTCATCCCTGAAAGGTGGCGGCAGCGTGCTTGTGGT GGGTAACCGTCGTATTCCCGGCGCGTTTATTCAGCAACTGAAAAATGGCCGGTGGCATGTCATGCAGCGTGTGGCTGGGA AAAACCGTTACCCCATTGATGTGGTGAAAATCCCGATGGCGGTGCCGCTGACCACGGCGTTTAAACAAAATATTGAGCGG ATACGGCGTGAACGTCTTCCGAAAGAGCTGGGCTATGCGCTGCAGCATCAACTGAGGATGGTAATAAAGCGATGAAACAT ACTGAACTCCGTGCAGCCGTACTGGATGCACTGGAGAAGCATGACACCGGGGCGACGTTTTTTGATGGTCGCCCCGCTGT TTTTGATGAGGCGGATTTTCCGGCAGTTGCCGTTTATCTCACCGGCGCTGAATACACGGGCGAAGAGCTGGACAGCGATA CCTGGCAGGCGGAGCTGCATATCGAAGTTTTCCTGCCTGCTCAGGTGCCGGATTCAGAGCTGGATGCGTGGATGGAGTCC CGGATTTATCCGGTGATGAGCGATATCCCGGCACTGTCAGATTTGATCACCAGTATGGTGGCCAGCGGCTATGACTACCG GCGCGACGATGATGCGGGCTTGTGGAGTTCAGCCGATCTGACTTATGTCATTACCTATGAAATGTGAGGACGCTATGCCT GTACCAAATCCTACAATGCCGGTGAAAGGTGCCGGGACCACCCTGTGGGTTTATAAGGGGAGCGGTGACCCTTACGCGAA TCCGCTTTCAGACGTTGACTGGTCGCGTCTGGCAAAAGTTAAAGACCTGACGCCCGGCGAACTGACCGCTGAGTCCTATG ACGACAGCTATCTCGATGATGAAGATGCAGACTGGACTGCGACCGGGCAGGGGCAGAAATCTGCCGGAGATACCAGCTTC ACGCTGGCGTGGATGCCCGGAGAGCAGGGGCAGCAGGCGCTGCTGGCGTGGTTTAATGAAGGCGATACCCGTGCCTATAA AATCCGCTTCCCGAACGGCACGGTCGATGTGTTCCGTGGCTGGGTCAGCAGTATCGGTAAGGCGGTGACGGCGAAGGAAG TGATCACCCGCACGGTGAAAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGATCGCAGCACGGTAACAGCGGCAACC GGCATGACCGTGACGCCTGCCAGCACCTCGGTGGTGAAAGGGCAGAGCACCACGCTGACCGTGGCCTTCCAGCCGGAGGG CGTAACCGACAAGAGCTTTCGTGCGGTGTCTGCGGATAAAACAAAAGCCACCGTGTCGGTCAGTGGTATGACCATCACCG TGAACGGCGTTGCTGCAGGCAAGGTCAACATTCCGGTTGTATCCGGTAATGGTGAGTTTGCTGCGGTTGCAGAAATTACC GTCACCGCCAGTTAATCCGGAGAGTCAGCGATGTTCCTGAAAACCGAATCATTTGAACATAACGGTGTGACCGTCACGCT TTCTGAACTGTCAGCCCTGCAGCGCATTGAGCATCTCGCCCTGATGAAACGGCAGGCAGAACAGGCGGAGTCAGACAGCA ACCGGAAGTTTACTGTGGAAGACGCCATCAGAACCGGCGCGTTTCTGGTGGCGATGTCCCTGTGGCATAACCATCCGCAG AAGACGCAGATGCCGTCCATGAATGAAGCCGTTAAACAGATTGAGCAGGAAGTGCTTACCACCTGGCCCACGGAGGCAAT TTCTCATGCTGAAAACGTGGTGTACCGGCTGTCTGGTATGTATGAGTTTGTGGTGAATAATGCCCCTGAACAGACAGAGG ACGCCGGGCCCGCAGAGCCTGTTTCTGCGGGAAAGTGTTCGACGGTGAGCTGAGTTTTGCCCTGAAACTGGCGCGTGAGA TGGGGCGACCCGACTGGCGTGCCATGCTTGCCGGGATGTCATCCACGGAGTATGCCGACTGGCACCGCTTTTACAGTACC CATTATTTTCATGATGTTCTGCTGGATATGCACTTTTCCGGGCTGACGTACACCGTGCTCAGCCTGTTTTTCAGCGATCC GGATATGCATCCGCTGGATTTCAGTCTGCTGAACCGGCGCGAGGCTGACGAAGAGCCTGAAGATGATGTGCTGATGCAGA AAGCGGCAGGGCTTGCCGGAGGTGTCCGCTTTGGCCCGGACGGGAATGAAGTTATCCCCGCTTCCCCGGATGTGGCGGAC ATGACGGAGGATGACGTAATGCTGATGACAGTATCAGAAGGGATCGCAGGAGGAGTCCGGTATGGCTGAACCGGTAGGCG ATCTGGTCGTTGATTTGAGTCTGGATGCGGCCAGATTTGACGAGCAGATGGCCAGAGTCAGGCGTCATTTTTCTGGTACG GAAAGTGATGCGAAAAAAACAGCGGCAGTCGTTGAACAGTCGCTGAGCCGACAGGCGCTGGCTGCACAGAAAGCGGGGAT TTCCGTCGGGCAGTATAAAGCCGCCATGCGTATGCTGCCTGCACAGTTCACCGACGTGGCCACGCAGCTTGCAGGCGGGC AAAGTCCGTGGCTGATCCTGCTGCAACAGGGGGGGCAGGTGAAGGACTCCTTCGGCGGGATGATCCCCATGTTCAGGGGG CTTGCCGGTGCGATCACCCTGCCGATGGTGGGGGCCACCTCGCTGGCGGTGGCGACCGGTGCGCTGGCGTATGCCTGGTA TCAGGGCAACTCAACCCTGTCCGATTTCAACAAAACGCTGGTCCTTTCCGGCAATCAGGCGGGACTGACGGCAGATCGTA TGCTGGTCCTGTCCAGAGCCGGGCAGGCGGCAGGGCTGACGTTTAACCAGACCAGCGAGTCACTCAGCGCACTGGTTAAG GCGGGGGTAAGCGGTGAGGCTCAGATTGCGTCCATCAGCCAGAGTGTGGCGCGTTTCTCCTCTGCATCCGGCGTGGAGGT GGACAAGGTCGCTGAAGCCTTCGGGAAGCTGACCACAGACCCGACGTCGGGGCTGACGGCGATGGCTCGCCAGTTCCATA ACGTGTCGGCGGAGCAGATTGCGTATGTTGCTCAGTTGCAGCGTTCCGGCGATGAAGCCGGGGCATTGCAGGCGGCGAAC GAGGCCGCAACGAAAGGGTTTGATGACCAGACCCGCCGCCTGAAAGAGAACATGGGCACGCTGGAGACCTGGGCAGACAG GACTGCGCGGGCATTCAAATCCATGTGGGATGCGGTGCTGGATATTGGTCGTCCTGATACCGCGCAGGAGATGCTGATTA AGGCAGAGGCTGCGTATAAGAAAGCAGACGACATCTGGAATCTGCGCAAGGATGATTATTTTGTTAACGATGAAGCGCGG GCGCGTTACTGGGATGATCGTGAAAAGGCCCGTCTTGCGCTTGAAGCCGCCCGAAAGAAGGCTGAGCAGCAGACTCAACA GGACAAAAATGCGCAGCAGCAGAGCGATACCGAAGCGTCACGGCTGAAATATACCGAAGAGGCGCAGAAGGCTTACGAAC GGCTGCAGACGCCGCTGGAGAAATATACCGCCCGTCAGGAAGAACTGAACAAGGCACTGAAAGACGGGAAAATCCTGCAG GCGGATTACAACACGCTGATGGCGGCGGCGAAAAAGGATTATGAAGCGACGCTGAAAAAGCCGAAACAGTCCAGCGTGAA GGTGTCTGCGGGCGATCGTCAGGAAGACAGTGCTCATGCTGCCCTGCTGACGCTTCAGGCAGAACTCCGGACGCTGGAGA AGCATGCCGGAGCAAATGAGAAAATCAGCCAGCAGCGCCGGGATTTGTGGAAGGCGGAGAGTCAGTTCGCGGTACTGGAG GAGGCGGCGCAACGTCGCCAGCTGTCTGCACAGGAGAAATCCCTGCTGGCGCATAAAGATGAGACGCTGGAGTACAAACG CCAGCTGGCTGCACTTGGCGACAAGGTTACGTATCAGGAGCGCCTGAACGCGCTGGCGCAGCAGGCGGATAAATTCGCAC AGCAGCAACGGGCAAAACGGGCCGCCATTGATGCGAAAAGCCGGGGGCTGACTGACCGGCAGGCAGAACGGGAAGCCACG GAACAGCGCCTGAAGGAACAGTATGGCGATAATCCGCTGGCGCTGAATAACGTCATGTCAGAGCAGAAAAAGACCTGGGC GGCTGAAGACCAGCTTCGCGGGAACTGGATGGCAGGCCTGAAGTCCGGCTGGAGTGAGTGGGAAGAGAGCGCCACGGACA GTATGTCGCAGGTAAAAAGTGCAGCCACGCAGACCTTTGATGGTATTGCACAGAATATGGCGGCGATGCTGACCGGCAGT GAGCAGAACTGGCGCAGCTTCACCCGTTCCGTGCTGTCCATGATGACAGAAATTCTGCTTAAGCAGGCAATGGTGGGGAT TGTCGGGAGTATCGGCAGCGCCATTGGCGGGGCTGTTGGTGGCGGCGCATCCGCGTCAGGCGGTACAGCCATTCAGGCCG CTGCGGCGAAATTCCATTTTGCAACCGGAGGATTTACGGGAACCGGCGGCAAATATGAGCCAGCGGGGATTGTTCACCGT GGTGAGTTTGTCTTCACGAAGGAGGCAACCAGCCGGATTGGCGTGGGGAATCTTTACCGGCTGATGCGCGGCTATGCCAC CGGCGGTTATGTCGGTACACCGGGCAGCATGGCAGACAGCCGGTCGCAGGCGTCCGGGACGTTTGAGCAGAATAACCATG TGGTGATTAACAACGACGGCACGAACGGGCAGATAGGTCCGGCTGCTCTGAAGGCGGTGTATGACATGGCCCGCAAGGGT GCCCGTGATGAAATTCAGACACAGATGCGTGATGGTGGCCTGTTCTCCGGAGGTGGACGATGAAGACCTTCCGCTGGAAA GTGAAACCCGGTATGGATGTGGCTTCGGTCCCTTCTGTAAGAAAGGTGCGCTTTGGTGATGGCTATTCTCAGCGAGCGCC TGCCGGGCTGAATGCCAACCTGAAAACGTACAGCGTGACGCTTTCTGTCCCCCGTGAGGAGGCCACGGTACTGGAGTCGT TTCTGGAAGAGCACGGGGGCTGGAAATCCTTTCTGTGGACGCCGCCTTATGAGTGGCGGCAGATAAAGGTGACCTGCGCA AAATGGTCGTCGCGGGTCAGTATGCTGCGTGTTGAGTTCAGCGCAGAGTTTGAACAGGTGGTGAACTGATGCAGGATATC CGGCAGGAAACACTGAATGAATGCACCCGTGCGGAGCAGTCGGCCAGCGTGGTGCTCTGGGAAATCGACCTGACAGAGGT CGGTGGAGAACGTTATTTTTTCTGTAATGAGCAGAACGAAAAAGGTGAGCCGGTCACCTGGCAGGGGCGACAGTATCAGC CGTATCCCATTCAGGGGAGCGGTTTTGAACTGAATGGCAAAGGCACCAGTACGCGCCCCACGCTGACGGTTTCTAACCTG TACGGTATGGTCACCGGGATGGCGGAAGATATGCAGAGTCTGGTCGGCGGAACGGTGGTCCGGCGTAAGGTTTACGCCCG TTTTCTGGATGCGGTGAACTTCGTCAACGGAAACAGTTACGCCGATCCGGAGCAGGAGGTGATCAGCCGCTGGCGCATTG AGCAGTGCAGCGAACTGAGCGCGGTGAGTGCCTCCTTTGTACTGTCCACGCCGACGGAAACGGATGGCGCTGTTTTTCCG GGACGTATCATGCTGGCCAACACCTGCACCTGGACCTATCGCGGTGACGAGTGCGGTTATAGCGGTCCGGCTGTCGCGGA TGAATATGACCAGCCAACGTCCGATATCACGAAGGATAAATGCAGCAAATGCCTGAGCGGTTGTAAGTTCCGCAATAACG TCGGCAACTTTGGCGGCTTCCTTTCCATTAACAAACTTTCGCAGTAAATCCCATGACACAGACAGAATCAGCGATTCTGG CGCACGCCCGGCGATGTGCGCCAGCGGAGTCGTGCGGCTTCGTGGTAAGCACGCCGGAGGGGGAAAGATATTTCCCCTGC GTGAATATCTCCGGTGAGCCGGAGGCTATTTCCGTATGTCGCCGGAAGACTGGCTGCAGGCAGAAATGCAGGGTGAGATT GTGGCGCTGGTCCACAGCCACCCCGGTGGTCTGCCCTGGCTGAGTGAGGCCGACCGGCGGCTGCAGGTGCAGAGTGATTT GCCGTGGTGGCTGGTCTGCCGGGGGACGATTCATAAGTTCCGCTGTGTGCCGCATCTCACCGGGCGGCGCTTTGAGCACG GTGTGACGGACTGTTACACACTGTTCCGGGATGCTTATCATCTGGCGGGGATTGAGATGCCGGACTTTCATCGTGAGGAT GACTGGTGGCGTAACGGCCAGAATCTCTATCTGGATAATCTGGAGGCGACGGGGCTGTATCAGGTGCCGTTGTCAGCGGC ACAGCCGGGCGATGTGCTGCTGTGCTGTTTTGGTTCATCAGTGCCGAATCACGCCGCAATTTACTGCGGCGACGGCGAGC TGCTGCACCATATTCCTGAACAACTGAGCAAACGAGAGAGGTACACCGACAAATGGCAGCGACGCACACACTCCCTCTGG CGTCACCGGGCATGGCGCGCATCTGCCTTTACGGGGATTTACAACGATTTGGTCGCCGCATCGACCTTCGTGTGAAAACG GGGGCTGAAGCCATCCGGGCACTGGCCACACAGCTCCCGGCGTTTCGTCAGAAACTGAGCGACGGCTGGTATCAGGTACG GATTGCCGGGCGGGACGTCAGCACGTCCGGGTTAACGGCGCAGTTACATGAGACTCTGCCTGATGGCGCTGTAATTCATA TTGTTCCCAGAGTCGCCGGGGCCAAGTCAGGTGGCGTATTCCAGATTGTCCTGGGGGCTGCCGCCATTGCCGGATCATTC TTTACCGCCGGAGCCACCCTTGCAGCATGGGGGGCAGCCATTGGGGCCGGTGGTATGACCGGCATCCTGTTTTCTCTCGG TGCCAGTATGGTGCTCGGTGGTGTGGCGCAGATGCTGGCACCGAAAGCCAGAACTCCCCGTATACAGACAACGGATAACG GTAAGCAGAACACCTATTTCTCCTCACTGGATAACATGGTTGCCCAGGGCAATGTTCTGCCTGTTCTGTACGGGGAAATG CGCGTGGGGTCACGCGTGGTTTCTCAGGAGATCAGCACGGCAGACGAAGGGGACGGTGGTCAGGTTGTGGTGATTGGTCG CTGATGCAAAATGTTTTATGTGAAACCGCCTGCGGGCGGTTTTGTCATTTATGGAGCGTGAGGAATGGGTAAAGGAAGCA GTAAGGGGCATACCCCGCGCGAAGCGAAGGACAACCTGAAGTCCACGCAGTTGCTGAGTGTGATCGATGCCATCAGCGAA GGGCCGATTGAAGGTCCGGTGGATGGCTTAAAAAGCGTGCTGCTGAACAGTACGCCGGTGCTGGACACTGAGGGGAATAC CAACATATCCGGTGTCACGGTGGTGTTCCGGGCTGGTGAGCAGGAGCAGACTCCGCCGGAGGGATTTGAATCCTCCGGCT CCGAGACGGTGCTGGGTACGGAAGTGAAATATGACACGCCGATCACCCGCACCATTACGTCTGCAAACATCGACCGTCTG CGCTTTACCTTCGGTGTACAGGCACTGGTGGAAACCACCTCAAAGGGTGACAGGAATCCGTCGGAAGTCCGCCTGCTGGT TCAGATACAACGTAACGGTGGCTGGGTGACGGAAAAAGACATCACCATTAAGGGCAAAACCACCTCGCAGTATCTGGCCT CGGTGGTGATGGGTAACCTGCCGCCGCGCCCGTTTAATATCCGGATGCGCAGGATGACGCCGGACAGCACCACAGACCAG CTGCAGAACAAAACGCTCTGGTCGTCATACACTGAAATCATCGATGTGAAACAGTGCTACCCGAACACGGCACTGGTCGG CGTGCAGGTGGACTCGGAGCAGTTCGGCAGCCAGCAGGTGAGCCGTAATTATCATCTGCGCGGGCGTATTCTGCAGGTGC CGTCGAACTATAACCCGCAGACGCGGCAATACAGCGGTATCTGGGACGGAACGTTTAAACCGGCATACAGCAACAACATG GCCTGGTGTCTGTGGGATATGCTGACCCATCCGCGCTACGGCATGGGGAAACGTCTTGGTGCGGCGGATGTGGATAAATG GGCGCTGTATGTCATCGGCCAGTACTGCGACCAGTCAGTGCCGGACGGCTTTGGCGGCACGGAGCCGCGCATCACCTGTA ATGCGTACCTGACCACACAGCGTAAGGCGTGGGATGTGCTCAGCGATTTCTGCTCGGCGATGCGCTGTATGCCGGTATGG AACGGGCAGACGCTGACGTTCGTGCAGGACCGACCGTCGGATAAGACGTGGACCTATAACCGCAGTAATGTGGTGATGCC GGATGATGGCGCGCCGTTCCGCTACAGCTTCAGCGCCCTGAAGGACCGCCATAATGCCGTTGAGGTGAACTGGATTGACC CGAACAACGGCTGGGAGACGGCGACAGAGCTTGTTGAAGATACGCAGGCCATTGCCCGTTACGGTCGTAATGTTACGAAG ATGGATGCCTTTGGCTGTACCAGCCGGGGGCAGGCACACCGCGCCGGGCTGTGGCTGATTAAAACAGAACTGCTGGAAAC GCAGACCGTGGATTTCAGCGTCGGCGCAGAAGGGCTTCGCCATGTACCGGGCGATGTTATTGAAATCTGCGATGATGACT ATGCCGGTATCAGCACCGGTGGTCGTGTGCTGGCGGTGAACAGCCAGACCCGGACGCTGACGCTCGACCGTGAAATCACG CTGCCATCCTCCGGTACCGCGCTGATAAGCCTGGTTGACGGAAGTGGCAATCCGGTCAGCGTGGAGGTTCAGTCCGTCAC CGACGGCGTGAAGGTAAAAGTGAGCCGTGTTCCTGACGGTGTTGCTGAATACAGCGTATGGGAGCTGAAGCTGCCGACGC TGCGCCAGCGACTGTTCCGCTGCGTGAGTATCCGTGAGAACGACGACGGCACGTATGCCATCACCGCCGTGCAGCATGTG CCGGAAAAAGAGGCCATCGTGGATAACGGGGCGCACTTTGACGGCGAACAGAGTGGCACGGTGAATGGTGTCACGCCGCC AGCGGTGCAGCACCTGACCGCAGAAGTCACTGCAGACAGCGGGGAATATCAGGTGCTGGCGCGATGGGACACACCGAAGG TGGTGAAGGGCGTGAGTTTCCTGCTCCGTCTGACCGTAACAGCGGACGACGGCAGTGAGCGGCTGGTCAGCACGGCCCGG ACGACGGAAACCACATACCGCTTCACGCAACTGGCGCTGGGGAACTACAGGCTGACAGTCCGGGCGGTAAATGCGTGGGG GCAGCAGGGCGATCCGGCGTCGGTATCGTTCCGGATTGCCGCACCGGCAGCACCGTCGAGGATTGAGCTGACGCCGGGCT ATTTTCAGATAACCGCCACGCCGCATCTTGCCGTTTATGACCCGACGGTACAGTTTGAGTTCTGGTTCTCGGAAAAGCAG ATTGCGGATATCAGACAGGTTGAAACCAGCACGCGTTATCTTGGTACGGCGCTGTACTGGATAGCCGCCAGTATCAATAT CAAACCGGGCCATGATTATTACTTTTATATCCGCAGTGTGAACACCGTTGGCAAATCGGCATTCGTGGAGGCCGTCGGTC GGGCGAGCGATGATGCGGAAGGTTACCTGGATTTTTTCAAAGGCAAGATAACCGAATCCCATCTCGGCAAGGAGCTGCTG GAAAAAGTCGAGCTGACGGAGGATAACGCCAGCAGACTGGAGGAGTTTTCGAAAGAGTGGAAGGATGCCAGTGATAAGTG GAATGCCATGTGGGCTGTCAAAATTGAGCAGACCAAAGACGGCAAACATTATGTCGCGGGTATTGGCCTCAGCATGGAGG ACACGGAGGAAGGCAAACTGAGCCAGTTTCTGGTTGCCGCCAATCGTATCGCATTTATTGACCCGGCAAACGGGAATGAA ACGCCGATGTTTGTGGCGCAGGGCAACCAGATATTCATGAACGACGTGTTCCTGAAGCGCCTGACGGCCCCCACCATTAC CAGCGGCGGCAATCCTCCGGCCTTTTCCCTGACACCGGACGGAAAGCTGACCGCTAAAAATGCGGATATCAGTGGCAGTG TGAATGCGAACTCCGGGACGCTCAGTAATGTGACGATAGCTGAAAACTGTACGATAAACGGTACGCTGAGGGCGGAAAAA ATCGTCGGGGACATTGTAAAGGCGGCGAGCGCGGCTTTTCCGCGCCAGCGTGAAAGCAGTGTGGACTGGCCGTCAGGTAC CCGTACTGTCACCGTGACCGATGACCATCCTTTTGATCGCCAGATAGTGGTGCTTCCGCTGACGTTTCGCGGAAGTAAGC GTACTGTCAGCGGCAGGACAACGTATTCGATGTGTTATCTGAAAGTACTGATGAACGGTGCGGTGATTTATGATGGCGCG GCGAACGAGGCGGTACAGGTGTTCTCCCGTATTGTTGACATGCCAGCGGGTCGGGGAAACGTGATCCTGACGTTCACGCT TACGTCCACACGGCATTCGGCAGATATTCCGCCGTATACGTTTGCCAGCGATGTGCAGGTTATGGTGATTAAGAAACAGG CGCTGGGCATCAGCGTGGTCTGAGTGTGTTACAGAGGTTCGTCCGGGAACGGGCGTTTTATTATAAAACAGTGAGAGGTG AACGATGCGTAATGTGTGTATTGCCGTTGCTGTCTTTGCCGCACTTGCGGTGACAGTCACTCCGGCCCGTGCGGAAGGTG GACATGGTACGTTTACGGTGGGCTATTTTCAAGTGAAACCGGGTACATTGCCGTCGTTGTCGGGCGGGGATACCGGTGTG AGTCATCTGAAAGGGATTAACGTGAAGTACCGTTATGAGCTGACGGACAGTGTGGGGGTGATGGCTTCCCTGGGGTTCGC CGCGTCGAAAAAGAGCAGCACAGTGATGACCGGGGAGGATACGTTTCACTATGAGAGCCTGCGTGGACGTTATGTGAGCG TGATGGCCGGACCGGTTTTACAAATCAGTAAGCAGGTCAGTGCGTACGCCATGGCCGGAGTGGCTCACAGTCGGTGGTCC GGCAGTACAATGGATTACCGTAAGACGGAAATCACTCCCGGGTATATGAAAGAGACGACCACTGCCAGGGACGAAAGTGC AATGCGGCATACCTCAGTGGCGTGGAGTGCAGGTATACAGATTAATCCGGCAGCGTCCGTCGTTGTTGATATTGCTTATG AAGGCTCCGGCAGTGGCGACTGGCGTACTGACGGATTCATCGTTGGGGTCGGTTATAAATTCTGATTAGCCAGGTAACAC AGTGTTATGACAGCCCGCCGGAACCGGTGGGCTTTTTTGTGGGGTGAATATGGCAGTAAAGATTTCAGGAGTCCTGAAAG ACGGCACAGGAAAACCGGTACAGAACTGCACCATTCAGCTGAAAGCCAGACGTAACAGCACCACGGTGGTGGTGAACACG GTGGGCTCAGAGAATCCGGATGAAGCCGGGCGTTACAGCATGGATGTGGAGTACGGTCAGTACAGTGTCATCCTGCAGGT TGACGGTTTTCCACCATCGCACGCCGGGACCATCACCGTGTATGAAGATTCACAACCGGGGACGCTGAATGATTTTCTCT GTGCCATGACGGAGGATGATGCCCGGCCGGAGGTGCTGCGTCGTCTTGAACTGATGGTGGAAGAGGTGGCGCGTAACGCG TCCGTGGTGGCACAGAGTACGGCAGACGCGAAGAAATCAGCCGGCGATGCCAGTGCATCAGCTGCTCAGGTCGCGGCCCT TGTGACTGATGCAACTGACTCAGCACGCGCCGCCAGCACGTCCGCCGGACAGGCTGCATCGTCAGCTCAGGAAGCGTCCT CCGGCGCAGAAGCGGCATCAGCAAAGGCCACTGAAGCGGAAAAAAGTGCCGCAGCCGCAGAGTCCTCAAAAAACGCGGCG GCCACCAGTGCCGGTGCGGCGAAAACGTCAGAAACGAATGCTGCAGCGTCACAACAATCAGCCGCCACGTCTGCCTCCAC CGCGGCCACGAAAGCGTCAGAGGCCGCCACTTCAGCACGAGATGCGGTGGCCTCAAAAGAGGCAGCAAAATCATCAGAAA CGAACGCATCATCAAGTGCCGGTCGTGCAGCTTCCTCGGCAACGGCGGCAGAAAATTCTGCCAGGGCGGCAAAAACGTCC GAGACGAATGCCAGGTCATCTGAAACAGCAGCGGAACGGAGCGCCTCTGCCGCGGCAGACGCAAAAACAGCGGCGGCGGG GAGTGCGTCAACGGCATCCACGAAGGCGACAGAGGCTGCGGGAAGTGCGGTATCAGCATCGCAGAGCAAAAGTGCGGCAG AAGCGGCGGCAATACGTGCAAAAAATTCGGCAAAACGTGCAGAAGATATAGCTTCAGCTGTCGCGCTTGAGGATGCGGAC ACAACGAGAAAGGGGATAGTGCAGCTCAGCAGTGCAACCAACAGCACGTCTGAAACGCTTGCTGCAACGCCAAAGGCGGT TAAGGTGGTAATGGATGAAACGAACAGAAAAGCCCACTGGACAGTCCGGCACTGACCGGAACGCCAACAGCACCAACCGC GCTCAGGGGAACAAACAATACCCAGATTGCGAACACCGCTTTTGTACTGGCCGCGATTGCAGATGTTATCGACGCGTCAC CTGACGCACTGAATACGCTGAATGAACTGGCCGCAGCGCTCGGGAATGATCCAGATTTTGCTACCACCATGACTAACGCG CTTGCGGGTAAACAACCGAAGAATGCGACACTGACGGCGCTGGCAGGGCTTTCCACGGCGAAAAATAAATTACCGTATTT TGCGGAAAATGATGCCGCCAGCCTGACTGAACTGACTCAGGTTGGCAGGGATATTCTGGCAAAAAATTCCGTTGCAGATG TTCTTGAATACCTTGGGGCCGGTGAGAATTCGGCCTTTCCGGCAGGTGCGCCGATCCCGTGGCCATCAGATATCGTTCCG TCTGGCTACGTCCTGATGCAGGGGCAGGCGTTTGACAAATCAGCCTACCCAAAACTTGCTGTCGCGTATCCATCGGGTGT GCTTCCTGATATGCGAGGCTGGACAATCAAGGGGAAACCCGCCAGCGGTCGTGCTGTATTGTCTCAGGAACAGGATGGAA TTAAGTCGCACACCCACAGTGCCAGTGCATCCGGTACGGATTTGGGGACGAAAACCACATCGTCGTTTGATTACGGGACG AAAACAACAGGCAGTTTCGATTACGGCACCAAATCGACGAATAACACGGGGGCTCATGCTCACAGTCTGAGCGGTTCAAC AGGGGCCGCGGGTGCTCATGCCCACACAAGTGGTTTAAGGATGAACAGTTCTGGCTGGAGTCAGTATGGAACAGCAACCA TTACAGGAAGTTTATCCACAGTTAAAGGAACCAGCACACAGGGTATTGCTTATTTATCGAAAACGGACAGTCAGGGCAGC CACAGTCACTCATTGTCCGGTACAGCCGTGAGTGCCGGTGCACATGCGCATACAGTTGGTATTGGTGCGCACCAGCATCC GGTTGTTATCGGTGCTCATGCCCATTCTTTCAGTATTGGTTCACACGGACACACCATCACCGTTAACGCTGCGGGTAACG CGGAAAACACCGTCAAAAACATTGCATTTAACTATATTGTGAGGCTTGCATAATGGCATTCAGAATGAGTGAACAACCAC GGACCATAAAAATTTATAATCTGCTGGCCGGAACTAATGAATTTATTGGTGAAGGTGACGCATATATTCCGCCTCATACC GGTCTGCCTGCAAACAGTACCGATATTGCACCGCCAGATATTCCGGCTGGCTTTGTGGCTGTTTTCAACAGTGATGAGGC ATCGTGGCATCTCGTTGAAGACCATCGGGGTAAAACCGTCTATGACGTGGCTTCCGGCGACGCGTTATTTATTTCTGAAC TCGGTCCGTTACCGGAAAATTTTACCTGGTTATCGCCGGGAGGGGAATATCAGAAGTGGAACGGCACAGCCTGGGTGAAG GATACGGAAGCAGAAAAACTGTTCCGGATCCGGGAGGCGGAAGAAACAAAAAAAAGCCTGATGCAGGTAGCCAGTGAGCA TATTGCGCCGCTTCAGGATGCTGCAGATCTGGAAATTGCAACGAAGGAAGAAACCTCGTTGCTGGAAGCCTGGAAGAAGT ATCGGGTGTTGCTGAACCGTGTTGATACATCAACTGCACCTGATATTGAGTGGCCTGCTGTCCCTGTTATGGAGTAATCG TTTTGTGATATGCCGCAGAAACGTTGTATGAAATAACGTTCTGCGGTTAGTTAGTATATTGTAAAGCTGAGTATTGGTTT ATTTGGCGATTATTATCTTCAGGAGAATAATGGAAGTTCTATGACTCAATTGTTCATAGTGTTTACATCACCGCCAATTG CTTTTAAGACTGAACGCATGAAATATGGTTTTTCGTCATGTTTTGAGTCTGCTGTTGATATTTCTAAAGTCGGTTTTTTT TCTTCGTTTTCTCTAACTATTTTCCATGAAATACATTTTTGATTATTATTTGAATCAATTCCAATTACCTGAAGTCTTTC ATCTATAATTGGCATTGTATGTATTGGTTTATTGGAGTAGATGCTTGCTTTTCTGAGCCATAGCTCTGATATCCAAATGA AGCCATAGGCATTTGTTATTTTGGCTCTGTCAGCTGCATAACGCCAAAAAATATATTTATCTGCTTGATCTTCAAATGTT GTATTGATTAAATCAATTGGATGGAATTGTTTATCATAAAAAATTAATGTTTGAATGTGATAACCGTCCTTTAAAAAAGT CGTTTCTGCAAGCTTGGCTGTATAGTCAACTAACTCTTCTGTCGAAGTGATATTTTTAGGCTTATCTACCAGTTTTAGAC GCTCTTTAATATCTTCAGGAATTATTTTATTGTCATATTGTATCATGCTAAATGACAATTTGCTTATGGAGTAATCTTTT AATTTTAAATAAGTTATTCTCCTGGCTTCATCAAATAAAGAGTCGAATGATGTTGGCGAAATCACATCGTCACCCATTGG ATTGTTTATTTGTATGCCAAGAGAGTTACAGCAGTTATACATTCTGCCATAGATTATAGCTAAGGCATGTAATAATTCGT AATCTTTTAGCGTATTAGCGACCCATCGTCTTTCTGATTTAATAATAGATGATTCAGTTAAATATGAAGGTAATTTCTTT TGTGCAAGTCTGACTAACTTTTTTATACCAATGTTTAACATACTTTCATTTGTAATAAACTCAATGTCATTTTCTTCAAT GTAAGATGAAATAAGAGTAGCCTTTGCCTCGCTATACATTTCTAAATCGCCTTGTTTTTCTATCGTATTGCGAGAATTTT TAGCCCAAGCCATTAATGGATCATTTTTCCATTTTTCAATAACATTATTGTTATACCAAATGTCATATCCTATAATCTGG TTTTTGTTTTTTTGAATAATAAATGTTACTGTTCTTGCGGTTTGGAGGAATTGATTCAAATTCAAGCGAAATAATTCAGG GTCAAAATATGTATCAATGCAGCATTTGAGCAAGTGCGATAAATCTTTAAGTCTTCTTTCCCATGGTTTTTTAGTCATAA AACTCTCCATTTTGATAGGTTGCATGCTAGATGCTGATATATTTTAGAGGTGATAAAATTAACTGCTTAACTGTCAATGT AATACAAGTTGTTTGATCTTTGCAATGATTCTTATCAGAAACCATATAGTAAATTAGTTACACAGGAAATTTTTAATATT ATTATTATCATTCATTATGTATTAAAATTAGAGTTGTGGCTTGGCTCTGCTAACACGTTGCTCATAGGAGATATGGTAGA GCCGCAGACACGTCGTATGCAGGAACGTGCTGCGGCTGGCTGGTGAACTTCCGATAGTGCGGGTGTTGAATGATTTCCAG TTGCTACCGATTTTACATATTTTTTGCATGAGAGAATTTGTACCACCTCCCACCGACCATCTATGACTGTACGCCACTGT CCCTAGGACTGCTATGTGCCGGAGCGGACATTACAAACGTCCTTCTCGGTGCATGCCACTGTTGCCAATGACCTGCCTAG GAATTGGTTAGCAAGTTACTACCGGATTTTGTAAAAACAGCCCTCCTCATATAAAAAGTATTCGTTCACTTCCGATAAGC GTCGTAATTTTCTATCTTTCATCATATTCTAGATCCCTCTGAAAAAATCTTCCGAGTTTGCTAGGCACTGATACATAACT CTTTTCCAATAATTGGGGAAGTCATTCAAATCTATAATAGGTTTCAGATTTGCTTCAATAAATTCTGACTGTAGCTGCTG AAACGTTGCGGTTGAACTATATTTCCTTATAACTTTTACGAAAGAGTTTCTTTGAGTAATCACTTCACTCAAGTGCTTCC CTGCCTCCAAACGATACCTGTTAGCAATATTTAATAGCTTGAAATGATGAAGAGCTCTGTGTTTGTCTTCCTGCCTCCAG TTCGCCGGGCATTCAACATAAAAACTGATAGCACCCGGAGTTCCGGAAACGAAATTTGCATATACCCATTGCTCACGAAA AAAAATGTCCTTGTCGATATAGGGATGAATCGCTTGGTGTACCTCATCTACTGCGAAAACTTGACCTTTCTCTCCCATAT TGCAGTCGCGGCACGATGGAACTAAATTAATAGGCATCACCGAAAATTCAGGATAATGTGCAATAGGAAGAAAATGATCT ATATTTTTTGTCTGTCCTATATCACCACAAAATGGACATTTTTCACCTGATGAAACAAGCATGTCATCGTAATATGTTCT AGCGGGTTTGTTTTTATCTCGGAGATTATTTTCATAAAGCTTTTCTAATTTAACCTTTGTCAGGTTACCAACTACTAAGG TTGTAGGCTCAAGAGGGTGTGTCCTGTCGTAGGTAAATAACTGACCTGTCGAGCTTAATATTCTATATTGTTGTTCTTTC TGCAAAAAAGTGGGGAAGTGAGTAATGAAATTATTTCTAACATTTATCTGCATCATACCTTCCGAGCATTTATTAAGCAT TTCGCTATAAGTTCTCGCTGGAAGAGGTAGTTTTTTCATTGTACTTTACCTTCATCTCTGTTCATTATCATCGCTTTTAA AACGGTTCGACCTTCTAATCCTATCTGACCATTATAATTTTTTAGAATGGTTTCATAAGAAAGCTCTGAATCAACGGACT GCGATAATAAGTGGTGGTATCCAGAATTTGTCACTTCAAGTAAAAACACCTCACGAGTTAAAACACCTAAGTTCTCACCG AATGTCTCAATATCCGGACGGATAATATTTATTGCTTCTCTTGACCGTAGGACTTTCCACATGCAGGATTTTGGAACCTC TTGCAGTACTACTGGGGAATGAGTTGCAATTATTGCTACACCATTGCGTGCATCGAGTAAGTCGCTTAATGTTCGTAAAA AAGCAGAGAGCAAAGGTGGATGCAGATGAACCTCTGGTTCATCGAATAAAACTAATGACTTTTCGCCAACGACATCTACT AATCTTGTGATAGTAAATAAAACAATTGCATGTCCAGAGCTCATTCGAAGCAGATATTTCTGGATATTGTCATAAAACAA TTTAGTGAATTTATCATCGTCCACTTGAATCTGTGGTTCATTACGTCTTAACTCTTCATATTTAGAAATGAGGCTGATGA GTTCCATATTTGAAAAGTTTTCATCACTACTTAGTTTTTTGATAGCTTCAAGCCAGAGTTGTCTTTTTCTATCTACTCTC ATACAACCAATAAATGCTGAAATGAATTCTAAGCGGAGATCGCCTAGTGATTTTAAACTATTGCTGGCAGCATTCTTGAG TCCAATATAAAAGTATTGTGTACCTTTTGCTGGGTCAGGTTGTTCTTTAGGAGGAGTAAAAGGATCAAATGCACTAAACG AAACTGAAACAAGCGATCGAAAATATCCCTTTGGGATTCTTGACTCGATAAGTCTATTATTTTCAGAGAAAAAATATTCA TTGTTTTCTGGGTTGGTGATTGCACCAATCATTCCATTCAAAATTGTTGTTTTACCACACCCATTCCGCCCGATAAAAGC ATGAATGTTCGTGCTGGGCATAGAATTAACCGTCACCTCAAAAGGTATAGTTAAATCACTGAATCCGGGAGCACTTTTTC TATTAAATGAAAAGTGGAAATCTGACAATTCTGGCAAACCATTTAACACACGTGCGAACTGTCCATGAATTTCTGAAAGA GTTACCCCTCTAAGTAATGAGGTGTTAAGGACGCTTTCATTTTCAATGTCGGCTAATCGATTTGGCCATACTACTAAATC CTGAATAGCTTTAAGAAGGTTATGTTTAAAACCATCGCTTAATTTGCTGAGATTAACATAGTAGTCAATGCTTTCACCTA AGGAAAAAAACATTTCAGGGAGTTGACTGAATTTTTTATCTATTAATGAATAAGTGCTTACTTCTTCTTTTTGACCTACA AAACCAATTTTAACATTTCCGATATCGCATTTTTCACCATGCTCATCAAAGACAGTAAGATAAAACATTGTAACAAAGGA ATAGTCATTCCAACCATCTGCTCGTAGGAATGCCTTATTTTTTTCTACTGCAGGAATATACCCGCCTCTTTCAATAACAC TAAACTCCAACATATAGTAACCCTTAATTTTATTAAAATAACCGCAATTTATTTGGCGGCAACACAGGATCTCTCTTTTA AGTTACTCTCTATTACATACGTTTTCCATCTAAAAATTAGTAGTATTGAACTTAACGGGGCATCGTATTGTAGTTTTCCA TATTTAGCTTTCTGCTTCCTTTTGGATAACCCACTGTTATTCATGTTGCATGGTGCACTGTTTATACCAACGATATAGTC TATTAATGCATATATAGTATCGCCGAACGATTAGCTCTTCAGGCTTCTGAAGAAGCGTTTCAAGTACTAATAAGCCGATA GATAGCCACGGACTTCGTAGCCATTTTTCATAAGTGTTAACTTCCGCTCCTCGCTCATAACAGACATTCACTACAGTTAT GGCGGAAAGGTATGCATGCTGGGTGTGGGGAAGTCGTGAAAGAAAAGAAGTCAGCTGCGTCGTTTGACATCACTGCTATC TTCTTACTGGTTATGCAGGTCGTAGTGGGTGGCACACAAAGCTTTGCACTGGATTGCGAGGCTTTGTGCTTCTCTGGAGT GCGACAGGTTTGATGACAAAAAATTAGCGCAAGAAGACAAAAATCACCTTGCGCTAATGCTCTGTTACAGGTCACTAATA CCATCTAAGTAGTTGATTCATAGTGACTGCATATGTTGTGTTTTACAGTATTATGTAGTCTGTTTTTTATGCAAAATCTA ATTTAATATATTGATATTTATATCATTTTACGTTTCTCGTTCAGCTTTTTTATACTAAGTTGGCATTATAAAAAAGCATT GCTTATCAATTTGTTGCAACGAACAGGTCACTATCAGTCAAAATAAAATCATTATTTGATTTCAATTTTGTCCCACTCCC TGCCTCTGTCATCACGATACTGTGATGCCATGGTGTCCGACTTATGCCCGAGAAGATGTTGAGCAAACTTATCGCTTATC TGCTTCTCATAGAGTCTTGCAGACAAACTGCGCAACTCGTGAAAGGTAGGCGGATCCCCTTCGAAGGAAAGACCTGATGC TTTTCGTGCGCGCATAAAATACCTTGATACTGTGCCGGATGAAAGCGGTTCGCGACGAGTAGATGCAATTATGGTTTCTC CGCCAAGAATCTCTTTGCATTTATCAAGTGTTTCCTTCATTGATATTCCGAGAGCATCAATATGCAATGCTGTTGGGATG GCAATTTTTACGCCTGTTTTGCTTTGCTCGACATAAAGATATCCATCTACGATATCAGACCACTTCATTTCGCATAAATC ACCAACTCGTTGCCCGGTAACAACAGCCAGTTCCATTGCAAGTCTGAGCCAACATGGTGATGATTCTGCTGCTTGATAAA TTTTCAGGTATTCGTCAGCCGTAAGTCTTGATCTCCTTACCTCTGATTTTGCTGCGCGAGTGGCAGCGACATGGTTTGTT GTTATATGGCCTTCAGCTATTGCCTCTCGGAATGCATCGCTCAGTGTTGATCTGATTAACTTGGCTGACGCCGCCTTGCC CTCGTCTATGTATCCATTGAGCATTGCCGCAATTTCTTTTGTGGTGATGTCTTCAAGTGGAGCATCAGGCAGACCCCTCC TTATTGCTTTAATTTTGCTCATGTAATTTATGAGTGTCTTCTGCTTGATTCCTCTGCTGGCCAGGATTTTTTCGTAGCGA TCAAGCCATGAATGTAACGTAACGGAATTATCACTGTTGATTCTCGCTGTCAGAGGCTTGTGTTTGTGTCCTGAAAATAA CTCAATGTTGGCCTGTATAGCTTCAGTGATTGCGATTCGCCTGTCTCTGCCTAATCCAAACTCTTTACCCGTCCTTGGGT CCCTGTAGCAGTAATATCCATTGTTTCTTATATAAAGGTTAGGGGGTAAATCCCGGCGCTCATGACTTCGCCTTCTTCCC ATTTCTGATCCTCTTCAAAAGGCCACCTGTTACTGGTCGATTTAAGTCAACCTTTACCGCTGATTCGTGGAACAGATACT CTCTTCCATCCTTAACCGGAGGTGGGAATATCCTGCATTCCCGAACCCATCGACGAACTGTTTCAAGGCTTCTTGGACGT CGCTGGCGTGCGTTCCACTCCTGAAGTGTCAAGTACATCGCAAAGTCTCCGCAATTACACGCAAGAAAAAACCGCCATCA GGCGGCTTGGTGTTCTTTCAGTTCTTCAATTCGAATATTGGTTACGTCTGCATGTGCTATCTGCGCCCATATCATCCAGT GGTCGTAGCAGTCGTTGATGTTCTCCGCTTCGATAACTCTGTTGAATGGCTCTCCATTCCATTCTCCTGTGACTCGGAAG TGCATTTATCATCTCCATAAAACAAAACCCGCCGTAGCGAGTTCAGATAAAATAAATCCCCGCGAGTGCGAGGATTGTTA TGTAATATTGGGTTTAATCATCTATATGTTTTGTACAGAGAGGGCAAGTATCGTTTCCACCGTACTCGTGATAATAATTT TGCACGGTATCAGTCATTTCTCGCACATTGCAGAATGGGGATTTGTCTTCATTAGACTTATAAACCTTCATGGAATATTT GTATGCCGACTCTATATCTATACCTTCATCTACATAAACACCTTCGTGATGTCTGCATGGAGACAAGACACCGGATCTGC ACAACATTGATAACGCCCAATCTTTTTGCTCAGACTCTAACTCATTGATACTCATTTATAAACTCCTTGCAATGTATGTC GTTTCAGCTAAACGGTATCAGCAATGTTTATGTAAAGAAACAGTAAGATAATACTCAACCCGATGTTTGAGTACGGTCAT CATCTGACACTACAGACTCTGGCATCGCTGTGAAGACGACGCGAAATTCAGCATTTTCACAAGCGTTATCTTTTACAAAA CCGATCTCACTCTCCTTTGATGCGAATGCCAGCGTCAGACATCATATGCAGATACTCACCTGCATCCTGAACCCATTGAC CTCCAACCCCGTAATAGCGATGCGTAATGATGTCGATAGTTACTAACGGGTCTTGTTCGATTAACTGCCGCAGAAACTCT TCCAGGTCACCAGTGCAGTGCTTGATAACAGGAGTCTTCCCAGGATGGCGAACAACAAGAAACTGGTTTCCGTCTTCACG GACTTCGTTGCTTTCCAGTTTAGCAATACGCTTACTCCCATCCGAGATAACACCTTCGTAATACTCACGCTGCTCGTTGA GTTTTGATTTTGCTGTTTCAAGCTCAACACGCAGTTTCCCTACTGTTAGCGCAATATCCTCGTTCTCCTGGTCGCGGCGT TTGATGTATTGCTGGTTTCTTTCCCGTTCATCCAGCAGTTCCAGCACAATCGATGGTGTTACCAATTCATGGAAAAGGTC TGCGTCAAATCCCCAGTCGTCATGCATTGCCTGCTCTGCCGCTTCACGCAGTGCCTGAGAGTTAATTTCGCTCACTTCGA ACCTCTCTGTTTACTGATAAGTTCCAGATCCTCCTGGCAACTTGCACAAGTCCGACAACCCTGAACGACCAGGCGTCTTC GTTCATCTATCGGATCGCCACACTCACAACAATGAGTGGCAGATATAGCCTGGTGGTTCAGGCGGCGCATTTTTATTGCT GTGTTGCGCTGTAATTCTTCTATTTCTGATGCTGAATCAATGATGTCTGCCATCTTTCATTAATCCCTGAACTGTTGGTT AATACGCTTGAGGGTGAATGCGAATAATAAAAAAGGAGCCTGTAGCTCCCTGATGATTTTGCTTTTCATGTTCATCGTTC CTTAAAGACGCCGTTTAACATGCCGATTGCCAGGCTTAAATGAGTCGGTGTGAATCCCATCAGCGTTACCGTTTCGCGGT GCTTCTTCAGTACGCTACGGCAAATGTCATCGACGTTTTTATCCGGAAACTGCTGTCTGGCTTTTTTTGATTTCAGAATT AGCCTGACGGGCAATGCTGCGAAGGGCGTTTTCCTGCTGAGGTGTCATTGAACAAGTCCCATGTCGGCAAGCATAAGCAC ACAGAATATGAAGCCCGCTGCCAGAAAAATGCATTCCGTGGTTGTCATACCTGGTTTCTCTCATCTGCTTCTGCTTTCGC CACCATCATTTCCAGCTTTTGTGAAAGGGATGCGGCTAACGTATGAAATTCTTCGTCTGTTTCTACTGGTATTGGCACAA ACCTGATTCCAATTTGAGCAAGGCTATGTGCCATCTCGATACTCGTTCTTAACTCAACAGAAGATGCTTTGTGCATACAG CCCCTCGTTTATTATTTATCTCCTCAGCCAGCCGCTGTGCTTTCAGTGGATTTCGGATAACAGAAAGGCCGGGAAATACC CAGCCTCGCTTTGTAACGGAGTAGACGAAAGTGATTGCGCCTACCCGGATATTATCGTGAGGATGCGTCATCGCCATTGC TCCCCAAATACAAAACCAATTTCAGCCAGTGCCTCGTCCATTTTTTCGATGAACTCCGGCACGATCTCGTCAAAACTCGC CATGTACTTTTCATCCCGCTCAATCACGACATAATGCAGGCCTTCACGCTTCATACGCGGGTCATAGTTGGCAAAGTACC AGGCATTTTTTCGCGTCACCCACATGCTGTACTGCACCTGGGCCATGTAAGCTGACTTTATGGCCTCGAAACCACCGAGC CGGAACTTCATGAAATCCCGGGAGGTAAACGGGCATTTCAGTTCAAGGCCGTTGCCGTCACTGCATAAACCATCGGGAGA GCAGGCGGTACGCATACTTTCGTCGCGATAGATGATCGGGGATTCAGTAACATTCACGCCGGAAGTGAATTCAAACAGGG TTCTGGCGTCGTTCTCGTACTGTTTTCCCCAGGCCAGTGCTTTAGCGTTAACTTCCGGAGCCACACCGGTGCAAACCTCA GCAAGCAGGGTGTGGAAGTAGGACATTTTCATGTCAGGCCACTTCTTTCCGGAGCGGGGTTTTGCTATCACGTTGTGAAC TTCTGAAGCGGTGATGACGCCGAGCCGTAATTTGTGCCACGCATCATCCCCCTGTTCGACAGCTCTCACATCGATCCCGG TACGCTGCAGGATAATGTCCGGTGTCATGCTGCCACCTTCTGCTCTGCGGCTTTCTGTTTCAGGAATCCAAGAGCTTTTA CTGCTTCGGCCTGTGTCAGTTCTGACGATGCACGAATGTCGCGGCGAAATATCTGGGAACAGAGCGGCAATAAGTCGTCA TCCCATGTTTTATCCAGGGCGATCAGCAGAGTGTTAATCTCCTGCATGGTTTCATCGTTAACCGGAGTGATGTCGCGTTC CGGCTGACGTTCTGCAGTGTATGCAGTATTTTCGACAATGCGCTCGGCTTCATCCTTGTCATAGATACCAGCAAATCCGA AGGCCAGACGGGCACACTGAATCATGGCTTTATGACGTAACATCCGTTTGGGATGCGACTGCCACGGCCCCGTGATTTCT CTGCCTTCGCGAGTTTTGAATGGTTCGCGGCGGCATTCATCCATCCATTCGGTAACGCAGATCGGATGATTACGGTCCTT GCGGTAAATCCGGCATGTACAGGATTCATTGTCCTGCTCAAAGTCCATGCCATCAAACTGCTGGTTTTCATTGATGATGC GGGACCAGCCATCAACGCCCACCACCGGAACGATGCCATTCTGCTTATCAGGAAAGGCGTAAATTTCTTTCGTCCACGGA TTAAGGCCGTACTGGTTGGCAACGATCAGTAATGCGATGAACTGCGCATCGCTGGCATCACCTTTAAATGCCGTCTGGCG AAGAGTGGTGATCAGTTCCTGTGGGTCGACAGAATCCATGCCGACACGTTCAGCCAGCTTCCCAGCCAGCGTTGCGAGTG CAGTACTCATTCGTTTTATACCTCTGAATCAATATCAACCTGGTGGTGAGCAATGGTTTCAACCATGTACCGGATGTGTT CTGCCATGCGCTCCTGAAACTCAACATCGTCATCAAACGCACGGGTAATGGATTTTTTGCTGGCCCCGTGGCGTTGCAAA TGATCGATGCATAGCGATTCAAACAGGTGCTGGGGCAGGCCTTTTTCCATGTCGTCTGCCAGTTCTGCCTCTTTCTCTTC ACGGGCGAGCTGCTGGTAGTGACGCGCCCAGCTCTGAGCCTCAAGACGATCCTGAATGTAATAAGCGTTCATGGCTGAAC TCCTGAAATAGCTGTGAAAATATCGCCCGCGAAATGCCGGGCTGATTAGGAAAACAGGAAAGGGGGTTAGTGAATGCTTT TGCTTGATCTCAGTTTCAGTATTAATATCCATTTTTTATAAGCGTCGACGGCTTCACGAAACATCTTTTCATCGCCAATA AAAGTGGCGATAGTGAATTTAGTCTGGATAGCCATAAGTGTTTGATCCATTCTTTGGGACTCCTGGCTGATTAAGTATGT CGATAAGGCGTTTCCATCCGTCACGTAATTTACGGGTGATTCGTTCAAGTAAAGATTCGGAAGGGCAGCCAGCAACAGGC CACCCTGCAATGGCATATTGCATGGTGTGCTCCTTATTTATACATAACGAAAAACGCCTCGAGTGAAGCGTTATTGGTAT GCGGTAAAACCGCACTCAGGCGGCCTTGATAGTCATATCATCTGAATCAAATATTCCTGATGTATCGATATCGGTAATTC TTATTCCTTCGCTACCATCCATTGGAGGCCATCCTTCCTGACCATTTCCATCATTCCAGTCGAACTCACACACAACACCA TATGCATTTAAGTCGCTTGAAATTGCTATAAGCAGAGCATGTTGCGCCAGCATGATTAATACAGCATTTAATACAGAGCC GTGTTTATTGAGTCGGTATTCAGAGTCTGACCAGAAATTATTAATCTGGTGAAGTTTTTCCTCTGTCATTACGTCATGGT CGATTTCAATTTCTATTGATGCTTTCCAGTCGTAATCAATGATGTATTTTTTGATGTTTGACATCTGTTCATATCCTCAC AGATAAAAAATCGCCCTCACACTGGAGGGCAAAGAAGATTTCCAATAATCAGAACAAGTCGGCTCCTGTTTAGTTACGAG CGACATTGCTCCGTGTATTCACTCGTTGGAATGAATACACAGTGCAGTGTTTATTCTGTTATTTATGCCAAAAATAAAGG CCACTATCAGGCAGCTTTGTTGTTCTGTTTACCAAGTTCTCTGGCAATCATTGCCGTCGTTCGTATTGCCCATTTATCGA CATATTTCCCATCTTCCATTACAGGAAACATTTCTTCAGGCTTAACCATGCATTCCGATTGCAGCTTGCATCCATTGCAT CGCTTGAATTGTCCACACCATTGATTTTTATCAATAGTCGTAGTCATACGGATAGTCCTGGTATTGTTCCATCACATCCT GAGGATGCTCTTCGAACTCTTCAAATTCTTCTTCCATATATCACCTTAAATAGTGGATTGCGGTAGTAAAGATTGTGCCT GTCTTTTAACCACATCAGGCTCGGTGGTTCTCGTGTACCCCTACAGCGAGAAATCGGATAAACTATTACAACCCCTACAG TTTGATGAGTATAGAAATGGATCCACTCGTTATTCTCGGACGAGTGTTCAGTAATGAACCTCTGGAGAGAACCATGTATA TGATCGTTATCTGGGTTGGACTTCTGCTTTTAAGCCCAGATAACTGGCCTGAATATGTTAATGAGAGAATCGGTATTCCT CATGTGTGGCATGTTTTCGTCTTTGCTCTTGCATTTTCGCTAGCAATTAATGTGCATCGATTATCAGCTATTGCCAGCGC CAGATATAAGCGATTTAAGCTAAGAAAACGCATTAAGATGCAAAACGATAAAGTGCGATCAGTAATTCAAAACCTTACAG AAGAGCAATCTATGGTTTTGTGCGCAGCCCTTAATGAAGGCAGGAAGTATGTGGTTACATCAAAACAATTCCCATACATT AGTGAGTTGATTGAGCTTGGTGTGTTGAACAAAACTTTTTCCCGATGGAATGGAAAGCATATATTATTCCCTATTGAGGA TATTTACTGGACTGAATTAGTTGCCAGCTATGATCCATATAATATTGAGATAAAGCCAAGGCCAATATCTAAGTAACTAG ATAAGAGGAATCGATTTTCCCTTAATTTTCTGGCGTCCACTGCATGTTATGCCGCGTTCGCCAGGCTTGCTGTACCATGT GCGCTGATTCTTGCGCTCAATACGTTGCAGGTTGCTTTCAATCTGTTTGTGGTATTCAGCCAGCACTGTAAGGTCTATCG GATTTAGTGCGCTTTCTACTCGTGATTTCGGTTTGCGATTCAGCGAGAGAATAGGGCGGTTAACTGGTTTTGCGCTTACC CCAACCAACAGGGGATTTGCTGCTTTCCATTGAGCCTGTTTCTCTGCGCGACGTTCGCGGCGGCGTGTTTGTGCATCCAT CTGGATTCTCCTGTCAGTTAGCTTTGGTGGTGTGTGGCAGTTGTAGTCCTGAACGAAAACCCCCCGCGATTGGCACATTG GCAGCTAATCCGGAATCGCACTTACGGCCAATGCTTCGTTTCGTATCACACACCCCAAAGCCTTCTGCTTTGAATGCTGC CCTTCTTCAGGGCTTAATTTTTAAGAGCGTCACCTTCATGGTGGTCAGTGCGTCCTGCTGATGTGCTCAGTATCACCGCC AGTGGTATTTATGTCAACACCGCCAGAGATAATTTATCACCGCAGATGGTTATCTGTATGTTTTTTATATGAATTTATTT TTTGCAGGGGGGCATTGTTTGGTAGGTGAGAGATCTGAATTGCTATGTTTAGTGAGTTGTATCTATTTATTTTTCAATAA ATACAATTGGTTATGTGTTTTGGGGGCGATCGTGAGGCAAAGAAAACCCGGCGCTGAGGCCGGGTTATTCTTGTTCTCTG GTCAAATTATATAGTTGGAAAACAAGGATGCATATATGAATGAACGATGCAGAGGCAATGCCGATGGCGATAGTGGGTAT CATGTAGCCGCTTATGCTGGAAAGAAGCAATAACCCGCAGAAAAACAAAGCTCCAAGCTCAACAAAACTAAGGGCATAGA CAATAACTACCGATGTCATATACCCATACTCTCTAATCTTGGCCAGTCGGCGCGTTCTGCTTCCGATTAGAAACGTCAAG GCAGCAATCAGGATTGCAATCATGGTTCCTGCATATGATGACAATGTCGCCCCAAGACCATCTCTATGAGCTGAAAAAGA AACACCAGGAATGTAGTGGCGGAAAAGGAGATAGCAAATGCTTACGATAACGTAAGGAATTATTACTATGTAAACACCAG GCATGATTCTGTTCCGCATAATTACTCCTGATAATTAATCCTTAACTTTGCCCACCTGCCTTTTAAAACATTCCAGTATA TCACTTTTCATTCTTGCGTAGCAATATGCCATCTCTTCAGCTATCTCAGCATTGGTGACCTTGTTCAGAGGCGCTGAGAG ATGGCCTTTTTCTGATAGATAATGTTCTGTTAAAATATCTCCGGCCTCATCTTTTGCCCGCAGGCTAATGTCTGAAAATT GAGGTGACGGGTTAAAAATAATATCCTTGGCAACCTTTTTTATATCCCTTTTAAATTTTGGCTTAATGACTATATCCAAT GAGTCAAAAAGCTCCCCTTCAATATCTGTTGCCCCTAAGACCTTTAATATATCGCCAAATACAGGTAGCTTGGCTTCTAC CTTCACCGTTGTTCGGCCGATGAAATGCATATGCATAACATCGTCTTTGGTGGTTCCCCTCATCAGTGGCTCTATCTGAA CGCGCTCTCCACTGCTTAATGACATTCCTTTCCCGATTAAAAAATCTGTCAGATCGGATGTGGTCGGCCCGAAAACAGTT CTGGCAAAACCAATGGTGTCGCCTTCAACAAACAAAAAAGATGGGAATCCCAATGATTCGTCATCTGCGAGGCTGTTCTT AATATCTTCAACTGAAGCTTTAGAGCGATTTATCTTCTGAACCAGACTCTTGTCATTTGTTTTGGTAAAGAGAAAAGTTT TTCCATCGATTTTATGAATATACAAATAATTGGAGCCAACCTGCAGGTGATGATTATCAGCCAGCAGAGAATTAAGGAAA ACAGACAGGTTTATTGAGCGCTTATCTTTCCCTTTATTTTTGCTGCGGTAAGTCGCATAAAAACCATTCTTCATAATTCA ATCCATTTACTATGTTATGTTCTGAGGGGAGTGAAAATTCCCCTAATTCGATGAAGATTCTTGCTCAATTGTTATCAGCT ATGCGCCGACCAGAACACCTTGCCGATCAGCCAAACGTCTCTTCAGGCCACTGACTAGCGATAACTTTCCCCACAACGGA ACAACTCTCATTGCATGGGATCATTGGGTACTGTGGGTTTAGTGGTTGTAAAAACACCTGACCGCTATCCCTGATCAGTT TCTTGAAGGTAAACTCATCACCCCCAAGTCTGGCTATGCAGAAATCACCTGGCTCAACAGCCTGCTCAGGGTCAACGAGA ATTAACATTCCGTCAGGAAAGCTTGGCTTGGAGCCTGTTGGTGCGGTCATGGAATTACCTTCAACCTCAAGCCAGAATGC AGAATCACTGGCTTTTTTGGTTGTGCTTACCCATCTCTCCGCATCACCTTTGGTAAAGGTTCTAAGCTCAGGTGAGAACA TCCCTGCCTGAACATGAGAAAAAACAGGGTACTCATACTCACTTCTAAGTGACGGCTGCATACTAACCGCTTCATACATC TCGTAGATTTCTCTGGCGATTGAAGGGCTAAATTCTTCAACGCTAACTTTGAGAATTTTTGCAAGCAATGCGGCGTTATA AGCATTTAATGCATTGATGCCATTAAATAAAGCACCAACGCCTGACTGCCCCATCCCCATCTTGTCTGCGACAGATTCCT GGGATAAGCCAAGTTCATTTTTCTTTTTTTCATAAATTGCTTTAAGGCGACGTGCGTCCTCAAGCTGCTCTTGTGTTAAT GGTTTCTTTTTTGTGCTCATACGTTAAATCTATCACCGCAAGGGATAAATATCTAACACCGTGCGTGTTGACTATTTTAC CTCTGGCGGTGATAATGGTTGCATGTACTAAGGAGGTTGTATGGAACAACGCATAACCCTGAAAGATTATGCAATGCGCT TTGGGCAAACCAAGACAGCTAAAGATCTCGGCGTATATCAAAGCGCGATCAACAAGGCCATTCATGCAGGCCGAAAGATT TTTTTAACTATAAACGCTGATGGAAGCGTTTATGCGGAAGAGGTAAAGCCCTTCCCGAGTAACAAAAAAACAACAGCATA AATAACCCCGCTCTTACACATTCCAGCCCTGAAAAAGGGCATCAAATTAAACCACACCTATGGTGTATGCATTTATTTGC ATACATTCAATCAATTGTTATCTAAGGAAATACTTACATATGGTTCGTGCAAACAAACGCAACGAGGCTCTACGAATCGA GAGTGCGTTGCTTAACAAAATCGCAATGCTTGGAACTGAGAAGACAGCGGAAGCTGTGGGCGTTGATAAGTCGCAGATCA GCAGGTGGAAGAGGGACTGGATTCCAAAGTTCTCAATGCTGCTTGCTGTTCTTGAATGGGGGGTCGTTGACGACGACATG GCTCGATTGGCGCGACAAGTTGCTGCGATTCTCACCAATAAAAAACGCCCGGCGGCAACCGAGCGTTCTGAACAAATCCA GATGGAGTTCTGAGGTCATTACTGGATCTATCAACAGGAGTCATTATGACAAATACAGCAAAAATACTCAACTTCGGCAG AGGTAACTTTGCCGGACAGGAGCGTAATGTGGCAGATCTCGATGATGGTTACGCCAGACTATCAAATATGCTGCTTGAGG CTTATTCGGGCGCAGATCTGACCAAGCGACAGTTTAAAGTGCTGCTTGCCATTCTGCGTAAAACCTATGGGTGGAATAAA CCAATGGACAGAATCACCGATTCTCAACTTAGCGAGATTACAAAGTTACCTGTCAAACGGTGCAATGAAGCCAAGTTAGA ACTCGTCAGAATGAATATTATCAAGCAGCAAGGCGGCATGTTTGGACCAAATAAAAACATCTCAGAATGGTGCATCCCTC AAAACGAGGGAAAATCCCCTAAAACGAGGGATAAAACATCCCTCAAATTGGGGGATTGCTATCCCTCAAAACAGGGGGAC ACAAAAGACACTATTACAAAAGAAAAAAGAAAAGATTATTCGTCAGAGAATTCTGGCGAATCCTCTGACCAGCCAGAAAA CGACCTTTCTGTGGTGAAACCGGATGCTGCAATTCAGAGCGGCAGCAAGTGGGGGACAGCAGAAGACCTGACCGCCGCAG AGTGGATGTTTGACATGGTGAAGACTATCGCACCATCAGCCAGAAAACCGAATTTTGCTGGGTGGGCTAACGATATCCGC CTGATGCGTGAACGTGACGGACGTAACCACCGCGACATGTGTGTGCTGTTCCGCTGGGCATGCCAGGACAACTTCTGGTC CGGTAACGTGCTGAGCCCGGCCAAACTCCGCGATAAGTGGACCCAACTCGAAATCAACCGTAACAAGCAACAGGCAGGCG TGACAGCCAGCAAACCAAAACTCGACCTGACAAACACAGACTGGATTTACGGGGTGGATCTATGAAAAACATCGCCGCAC AGATGGTTAACTTTGACCGTGAGCAGATGCGTCGGATCGCCAACAACATGCCGGAACAGTACGACGAAAAGCCGCAGGTA CAGCAGGTAGCGCAGATCATCAACGGTGTGTTCAGCCAGTTACTGGCAACTTTCCCGGCGAGCCTGGCTAACCGTGACCA GAACGAAGTGAACGAAATCCGTCGCCAGTGGGTTCTGGCTTTTCGGGAAAACGGGATCACCACGATGGAACAGGTTAACG CAGGAATGCGCGTAGCCCGTCGGCAGAATCGACCATTTCTGCCATCACCCGGGCAGTTTGTTGCATGGTGCCGGGAAGAA GCATCCGTTACCGCCGGACTGCCAAACGTCAGCGAGCTGGTTGATATGGTTTACGAGTATTGCCGGAAGCGAGGCCTGTA TCCGGATGCGGAGTCTTATCCGTGGAAATCAAACGCGCACTACTGGCTGGTTACCAACCTGTATCAGAACATGCGGGCCA ATGCGCTTACTGATGCGGAATTACGCCGTAAGGCCGCAGATGAGCTTGTCCATATGACTGCGAGAATTAACCGTGGTGAG GCGATCCCTGAACCAGTAAAACAACTTCCTGTCATGGGCGGTAGACCTCTAAATCGTGCACAGGCTCTGGCGAAGATCGC AGAAATCAAAGCTAAGTTCGGACTGAAAGGAGCAAGTGTATGACGGGCAAAGAGGCAATTATTCATTACCTGGGGACGCA TAATAGCTTCTGTGCGCCGGACGTTGCCGCGCTAACAGGCGCAACAGTAACCAGCATAAATCAGGCCGCGGCTAAAATGG CACGGGCAGGTCTTCTGGTTATCGAAGGTAAGGTCTGGCGAACGGTGTATTACCGGTTTGCTACCAGGGAAGAACGGGAA GGAAAGATGAGCACGAACCTGGTTTTTAAGGAGTGTCGCCAGAGTGCCGCGATGAAACGGGTATTGGCGGTATATGGAGT TAAAAGATGACCATCTACATTACTGAGCTAATAACAGGCCTGCTGGTAATCGCAGGCCTTTTTATTTGGGGGAGAGGGAA GTCATGAAAAAACTAACCTTTGAAATTCGATCTCCAGCACATCAGCAAAACGCTATTCACGCAGTACAGCAAATCCTTCC AGACCCAACCAAACCAATCGTAGTAACCATTCAGGAACGCAACCGCAGCTTAGACCAAAACAGGAAGCTATGGGCCTGCT TAGGTGACGTCTCTCGTCAGGTTGAATGGCATGGTCGCTGGCTGGATGCAGAAAGCTGGAAGTGTGTGTTTACCGCAGCA TTAAAGCAGCAGGATGTTGTTCCTAACCTTGCCGGGAATGGCTTTGTGGTAATAGGCCAGTCAACCAGCAGGATGCGTGT AGGCGAATTTGCGGAGCTATTAGAGCTTATACAGGCATTCGGTACAGAGCGTGGCGTTAAGTGGTCAGACGAAGCGAGAC TGGCTCTGGAGTGGAAAGCGAGATGGGGAGACAGGGCTGCATGATAAATGTCGTTAGTTTCTCCGGTGGCAGGACGTCAG CATATTTGCTCTGGCTAATGGAGCAAAAGCGACGGGCAGGTAAAGACGTGCATTACGTTTTCATGGATACAGGTTGTGAA CATCCAATGACATATCGGTTTGTCAGGGAAGTTGTGAAGTTCTGGGATATACCGCTCACCGTATTGCAGGTTGATATCAA CCCGGAGCTTGGACAGCCAAATGGTTATACGGTATGGGAACCAAAGGATATTCAGACGCGAATGCCTGTTCTGAAGCCAT TTATCGATATGGTAAAGAAATATGGCACTCCATACGTCGGCGGCGCGTTCTGCACTGACAGATTAAAACTCGTTCCCTTC ACCAAATACTGTGATGACCATTTCGGGCGAGGGAATTACACCACGTGGATTGGCATCAGAGCTGATGAACCGAAGCGGCT AAAGCCAAAGCCTGGAATCAGATATCTTGCTGAACTGTCAGACTTTGAGAAGGAAGATATCCTCGCATGGTGGAAGCAAC AACCATTCGATTTGCAAATACCGGAACATCTCGGTAACTGCATATTCTGCATTAAAAAATCAACGCAAAAAATCGGACTT GCCTGCAAAGATGAGGAGGGATTGCAGCGTGTTTTTAATGAGGTCATCACGGGATCCCATGTGCGTGACGGACATCGGGA AACGCCAAAGGAGATTATGTACCGAGGAAGAATGTCGCTGGACGGTATCGCGAAAATGTATTCAGAAAATGATTATCAAG CCCTGTATCAGGACATGGTACGAGCTAAAAGATTCGATACCGGCTCTTGTTCTGAGTCATGCGAAATATTTGGAGGGCAG CTTGATTTCGACTTCGGGAGGGAAGCTGCATGATGCGATGTTATCGGTGCGGTGAATGCAAAGAAGATAACCGCTTCCGA CCAAATCAACCTTACTGGAATCGATGGTGTCTCCGGTGTGAAAGAACACCAACAGGGGTGTTACCACTACCGCAGGAAAA GGAGGACGTGTGGCGAGACAGCGACGAAGTATCACCGACATAATCTGCGAAAACTGCAAATACCTTCCAACGAAACGCAC CAGAAATAAACCCAAGCCAATCCCAAAAGAATCTGACGTAAAAACCTTCAACTACACGGCTCACCTGTGGGATATCCGGT GGCTAAGACGTCGTGCGAGGAAAACAAGGTGATTGACCAAAATCGAAGTTACGAACAAGAAAGCGTCGAGCGAGCTTTAA CGTGCGCTAACTGCGGTCAGAAGCTGCATGTGCTGGAAGTTCACGTGTGTGAGCACTGCTGCGCAGAACTGATGAGCGAT CCGAATAGCTCGATGCACGAGGAAGAAGATGATGGCTAAACCAGCGCGAAGACGATGTAAAAACGATGAATGCCGGGAAT GGTTTCACCCTGCATTCGCTAATCAGTGGTGGTGCTCTCCAGAGTGTGGAACCAAGATAGCACTCGAACGACGAAGTAAA GAACGCGAAAAAGCGGAAAAAGCAGCAGAGAAGAAACGACGACGAGAGGAGCAGAAACAGAAAGATAAACTTAAGATTCG AAAACTCGCCTTAAAGCCCCGCAGTTACTGGATTAAACAAGCCCAACAAGCCGTAAACGCCTTCATCAGAGAAAGAGACC GCGACTTACCATGTATCTCGTGCGGAACGCTCACGTCTGCTCAGTGGGATGCCGGACATTACCGGACAACTGCTGCGGCA CCTCAACTCCGATTTAATGAACGCAATATTCACAAGCAATGCGTGGTGTGCAACCAGCACAAAAGCGGAAATCTCGTTCC GTATCGCGTCGAACTGATTAGCCGCATCGGGCAGGAAGCAGTAGACGAAATCGAATCAAACCATAACCGCCATCGCTGGA CTATCGAAGAGTGCAAGGCGATCAAGGCAGAGTACCAACAGAAACTCAAAGACCTGCGAAATAGCAGAAGTGAGGCCGCA TGACGTTCTCAGTAAAAACCATTCCAGACATGCTCGTTGAAGCATACGGAAATCAGACAGAAGTAGCACGCAGACTGAAA TGTAGTCGCGGTACGGTCAGAAAATACGTTGATGATAAAGACGGGAAAATGCACGCCATCGTCAACGACGTTCTCATGGT TCATCGCGGATGGAGTGAAAGAGATGCGCTATTACGAAAAAATTGATGGCAGCAAATACCGAAATATTTGGGTAGTTGGC GATCTGCACGGATGCTACACGAACCTGATGAACAAACTGGATACGATTGGATTCGACAACAAAAAAGACCTGCTTATCTC GGTGGGCGATTTGGTTGATCGTGGTGCAGAGAACGTTGAATGCCTGGAATTAATCACATTCCCCTGGTTCAGAGCTGTAC GTGGAAACCATGAGCAAATGATGATTGATGGCTTATCAGAGCGTGGAAACGTTAATCACTGGCTGCTTAATGGCGGTGGC TGGTTCTTTAATCTCGATTACGACAAAGAAATTCTGGCTAAAGCTCTTGCCCATAAAGCAGATGAACTTCCGTTAATCAT CGAACTGGTGAGCAAAGATAAAAAATATGTTATCTGCCACGCCGATTATCCCTTTGACGAATACGAGTTTGGAAAGCCAG TTGATCATCAGCAGGTAATCTGGAACCGCGAACGAATCAGCAACTCACAAAACGGGATCGTGAAAGAAATCAAAGGCGCG GACACGTTCATCTTTGGTCATACGCCAGCAGTGAAACCACTCAAGTTTGCCAACCAAATGTATATCGATACCGGCGCAGT GTTCTGCGGAAACCTAACATTGATTCAGGTACAGGGAGAAGGCGCATGAGACTCGAAAGCGTAGCTAAATTTCATTCGCC AAAAAGCCCGATGATGAGCGACTCACCACGGGCCACGGCTTCTGACTCTCTTTCCGGTACTGATGTGATGGCTGCTATGG GGATGGCGCAATCACAAGCCGGATTCGGTATGGCTGCATTCTGCGGTAAGCACGAACTCAGCCAGAACGACAAACAAAAG GCTATCAACTATCTGATGCAATTTGCACACAAGGTATCGGGGAAATACCGTGGTGTGGCAAAGCTTGAAGGAAATACTAA GGCAAAGGTACTGCAAGTGCTCGCAACATTCGCTTATGCGGATTATTGCCGTAGTGCCGCGACGCCGGGGGCAAGATGCA GAGATTGCCATGGTACAGGCCGTGCGGTTGATATTGCCAAAACAGAGCTGTGGGGGAGAGTTGTCGAGAAAGAGTGCGGA AGATGCAAAGGCGTCGGCTATTCAAGGATGCCAGCAAGCGCAGCATATCGCGCTGTGACGATGCTAATCCCAAACCTTAC CCAACCCACCTGGTCACGCACTGTTAAGCCGCTGTATGACGCTCTGGTGGTGCAATGCCACAAAGAAGAGTCAATCGCAG ACAACATTTTGAATGCGGTCACACGTTAGCAGCATGATTGCCACGGATGGCAACATATTAACGGCATGATATTGACTTAT TGAATAAAATTGGGTAAATTTGACTCAACGATGGGTTAATTCGCTCGTTGTGGTAGTGAGATGAAAAGAGGCGGCGCTTA CTACCGATTCCGCCTAGTTGGTCACTTCGACGTATCGTCTGGAACTCCAACCATCGCAGGCAGAGAGGTCTGCAAAATGC AATCCCGAAACAGTTCGCAGGTAATAGTTAGAGCCTGCATAACGGTTTCGGGATTTTTTATATCTGCACAACAGGTAAGA GCATTGAGTCGATAATCGTGAAGAGTCGGCGAGCCTGGTTAGCCAGTGCTCTTTCCGTTGTGCTGAATTAAGCGAATACC GGAAGCAGAACCGGATCACCAAATGCGTACAGGCGTCATCGCCGCCCAGCAACAGCACAACCCAAACTGAGCCGTAGCCA CTGTCTGTCCTGAATTCATTAGTAATAGTTACGCTGCGGCCTTTTACACATGACCTTCGTGAAAGCGGGTGGCAGGAGGT CGCGCTAACAACCTCCTGCCGTTTTGCCCGTGCATATCGGTCACGAACAAATCTGATTACTAAACACAGTAGCCTGGATT TGTTCTATCAGTAATCGACCTTATTCCTAATTAAATAGAGCAAATCCCCTTATTGGGGGTAAGACATGAAGATGCCAGAA AAACATGACCTGTTGGCCGCCATTCTCGCGGCAAAGGAACAAGGCATCGGGGCAATCCTTGCGTTTGCAATGGCGTACCT TCGCGGCAGATATAATGGCGGTGCGTTTACAAAAACAGTAATCGACGCAACGATGTGCGCCATTATCGCCTGGTTCATTC GTGACCTTCTCGACTTCGCCGGACTAAGTAGCAATCTCGCTTATATAACGAGCGTGTTTATCGGCTACATCGGTACTGAC TCGATTGGTTCGCTTATCAAACGCTTCGCTGCTAAAAAAGCCGGAGTAGAAGATGGTAGAAATCAATAATCAACGTAAGG CGTTCCTCGATATGCTGGCGTGGTCGGAGGGAACTGATAACGGACGTCAGAAAACCAGAAATCATGGTTATGACGTCATT GTAGGCGGAGAGCTATTTACTGATTACTCCGATCACCCTCGCAAACTTGTCACGCTAAACCCAAAACTCAAATCAACAGG CGCCGGACGCTACCAGCTTCTTTCCCGTTGGTGGGATGCCTACCGCAAGCAGCTTGGCCTGAAAGACTTCTCTCCGAAAA GTCAGGACGCTGTGGCATTGCAGCAGATTAAGGAGCGTGGCGCTTTACCTATGATTGATCGTGGTGATATCCGTCAGGCA ATCGACCGTTGCAGCAATATCTGGGCTTCACTGCCGGGCGCTGGTTATGGTCAGTTCGAGCATAAGGCTGACAGCCTGAT TGCAAAATTCAAAGAAGCGGGCGGAACGGTCAGAGAGATTGATGTATGAGCAGAGTCACCGCGATTATCTCCGCTCTGGT TATCTGCATCATCGTCTGCCTGTCATGGGCTGTTAATCATTACCGTGATAACGCCATTACCTACAAAGCCCAGCGCGACA AAAATGCCAGAGAACTGAAGCTGGCGAACGCGGCAATTACTGACATGCAGATGCGTCAGCGTGATGTTGCTGCGCTCGAT GCAAAATACACGAAGGAGTTAGCTGATGCTAAAGCTGAAAATGATGCTCTGCGTGATGATGTTGCCGCTGGTCGTCGTCG GTTGCACATCAAAGCAGTCTGTCAGTCAGTGCGTGAAGCCACCACCGCCTCCGGCGTGGATAATGCAGCCTCCCCCCGAC TGGCAGACACCGCTGAACGGGATTATTTCACCCTCAGAGAGAGGCTGATCACTATGCAAAAACAACTGGAAGGAACCCAG AAGTATATTAATGAGCAGTGCAGATAGAGTTGCCCATATCGATGGGCAACTCATGCAATTATTGTGAGCAATACACACGC GCTTCCAGCGGAGTATAAATGCCTAAAGTAATAAAACCGAGCAATCCATTTACGAATGTTTGCTGGGTTTCTGTTTTAAC AACATTTTCTGCGCCGCCACAAATTTTGGCTGCATCGACAGTTTTCTTCTGCCCAATTCCAGAAACGAAGAAATGATGGG TGATGGTTTCCTTTGGTGCTACTGCTGCCGGTTTGTTTTGAACAGTAAACGTCTGTTGAGCACATCCTGTAATAAGCAGG GCCAGCGCAGTAGCGAGTAGCATTTTTTTCATGGTGTTATTCCCGATGCTTTTTGAAGTTCGCAGAATCGTATGTGTAGA AAATTAAACAAACCCTAAACAATGAGTTGAAATTTCATATTGTTAATATTTATTAATGTATGTCAGGTGCGATGAATCGT CATTGTATTCCCGGATTAACTATGTCCACAGCCCTGACGGGGAACTTCTCTGCGGGAGTGTCCGGGAATAATTAAAACGA TGCACACAGGGTTTAGCGCGTACACGTATTGCATTATGCCAACGCCCCGGTGCTGACACGGAAGAAACCGGACGTTATGA TTTAGCGTGGAAAGATTTGTGTAGTGTTCTGAATGCTCTCAGTAAATAGTAATGAATTATCAAAGGTATAGTAATATCTT TTATGTTCATGGATATTTGTAACCCATCGGAAAACTCCTGCTTTAGCAAGATTTTCCCTGTATTGCTGAAATGTGATTTC TCTTGATTTCAACCTATCATAGGACGTTTCTATAAGATGCGTGTTTCTTGAGAATTTAACATTTACAACCTTTTTAAGTC CTTTTATTAACACGGTGTTATCGTTTTCTAACACGATGTGAATATTATCTGTGGCTAGATAGTAAATATAATGTGAGACG TTGTGACGTTTTAGTTCAGAATAAAACAATTCACAGTCTAAATCTTTTCGCACTTGATCGAATATTTCTTTAAAAATGGC AACCTGAGCCATTGGTAAAACCTTCCATGTGATACGAGGGCGCGTAGTTTGCATTATCGTTTTTATCGTTTCAATCTGGT CTGACCTCCTTGTGTTTTGTTGATGATTTATGTCAAATATTAGGAATGTTTTCACTTAATAGTATTGGTTGCGTAACAAA GTGCGGTCCTGCTGGCATTCTGGAGGGAAATACAACCGACAGATGTATGTAAGGCCAACGTGCTCAAATCTTCATACAGA AAGATTTGAAGTAATATTTTAACCGCTAGATGAAGAGCAAGCGCATGGAGCGACAAAATGAATAAAGAACAATCTGCTGA TGATCCCTCCGTGGATCTGATTCGTGTAAAAAATATGCTTAATAGCACCATTTCTATGAGTTACCCTGATGTTGTAATTG CATGTATAGAACATAAGGTGTCTCTGGAAGCATTCAGAGCAATTGAGGCAGCGTTGGTGAAGCACGATAATAATATGAAG GATTATTCCCTGGTGGTTGACTGATCACCATAACTGCTAATCATTCAAACTATTTAGTCTGTGACAGAGCCAACACGCAG TCTGTCACTGTCAGGAAAGTGGTAAAACTGCAACTCAATTACTGCAATGCCCTCGTAATTAAGTGAATTTACAATATCGT CCTGTTCGGAGGGAAGAACGCGGGATGTTCATTCTTCATCACTTTTAATTGATGTATATGCTCTCTTTTCTGACGTTAGT CTCCGACGGCAGGCTTCAATGACCCAGGCTGAGAAATTCCCGGACCCTTTTTGCTCAAGAGCGATGTTAATTTGTTCAAT CATTTGGTTAGGAAAGCGGATGTTGCGGGTTGTTGTTCTGCGGGTTCTGTTCTTCGTTGACATGAGGTTGCCCCGTATTC AGTGTCGCTGATTTGTATTGTCTGAAGTTGTTTTTACGTTAAGTTGATGCAGATCAATTAATACGATACCTGCGTCATAA TTGATTATTTGACGTGGTTTGATGGCCTCCACGCACGTTGTGATATGTAGATGATAATCATTATCACTTTACGGGTCCTT TCCGGTGATCCGACAGGTTACG kraken2-2.1.2/data/MERS.fa000066400000000000000000000736001404614725400150550ustar00rootroot00000000000000>kraken:taxid|1335626|NC_019843.3 Middle East respiratory syndrome coronavirus, complete genome GATTTAAGTGAATAGCTTGGCTATCTCACTTCCCCTCGTTCTCTTGCAGAACTTTGATTTTAACGAACTTAAATAAAAGC CCTGTTGTTTAGCGTATCGTTGCACTTGTCTGGTGGGATTGTGGCATTAATTTGCCTGCTCATCTAGGCAGTGGACATAT GCTCAACACTGGGTATAATTCTAATTGAATACTATTTTTCAGTTAGAGCGTCGTGTCTCTTGTACGTCTCGGTCACAATA CACGGTTTCGTCCGGTGCGTGGCAATTCGGGGCACATCATGTCTTTCGTGGCTGGTGTGACCGCGCAAGGTGCGCGCGGT ACGTATCGAGCAGCGCTCAACTCTGAAAAACATCAAGACCATGTGTCTCTAACTGTGCCACTCTGTGGTTCAGGAAACCT GGTTGAAAAACTTTCACCATGGTTCATGGATGGCGAAAATGCCTATGAAGTGGTGAAGGCCATGTTACTTAAAAAGGAGC CACTTCTCTATGTGCCCATCCGGCTGGCTGGACACACTAGACACCTCCCAGGTCCTCGTGTGTACCTGGTTGAGAGGCTC ATTGCTTGTGAAAATCCATTCATGGTTAACCAATTGGCTTATAGCTCTAGTGCAAATGGCAGCCTGGTTGGCACAACTTT GCAGGGCAAGCCTATTGGTATGTTCTTCCCTTATGACATCGAACTTGTCACAGGAAAGCAAAATATTCTCCTGCGCAAGT ATGGCCGTGGTGGTTATCACTACACCCCATTCCACTATGAGCGAGACAACACCTCTTGCCCTGAGTGGATGGACGATTTT GAGGCGGATCCTAAAGGCAAATATGCCCAGAATCTGCTTAAGAAGTTGATTGGCGGTGATGTCACTCCAGTTGACCAATA CATGTGTGGCGTTGATGGAAAACCCATTAGTGCCTACGCATTTTTAATGGCCAAGGATGGAATAACCAAACTGGCTGATG TTGAAGCGGACGTCGCAGCACGTGCTGATGACGAAGGCTTCATCACATTAAAGAACAATCTATATAGATTGGTTTGGCAT GTTGAGCGTAAAGACGTTCCATATCCTAAGCAATCTATTTTTACTATTAATAGTGTGGTCCAAAAGGATGGTGTTGAAAA CACTCCTCCTCACTATTTTACTCTTGGATGCAAAATTTTAACGCTCACCCCACGCAACAAGTGGAGTGGCGTTTCTGACT TGTCCCTCAAACAAAAACTCCTTTACACCTTCTATGGTAAGGAGTCACTTGAGAACCCAACCTACATTTACCACTCCGCA TTCATTGAGTGTGGAAGTTGTGGTAATGATTCCTGGCTTACAGGGAATGCTATCCAAGGGTTTGCCTGTGGATGTGGGGC ATCATATACAGCTAATGATGTCGAAGTCCAATCATCTGGCATGATTAAGCCAAATGCTCTTCTTTGTGCTACTTGCCCCT TTGCTAAGGGTGATAGCTGTTCTTCTAATTGCAAACATTCAGTTGCTCAGTTGGTTAGTTACCTTTCTGAACGCTGTAAT GTTATTGCTGATTCTAAGTCCTTCACACTTATCTTTGGTGGCGTAGCTTACGCCTACTTTGGATGTGAGGAAGGTACTAT GTACTTTGTGCCTAGAGCTAAGTCTGTTGTCTCAAGGATTGGAGACTCCATCTTTACAGGCTGTACTGGCTCTTGGAACA AGGTCACTCAAATTGCTAACATGTTCTTGGAACAGACTCAGCATTCCCTTAACTTTGTGGGAGAGTTCGTTGTCAACGAT GTTGTCCTCGCAATTCTCTCTGGAACCACAACTAATGTTGACAAAATACGCCAGCTTCTCAAAGGTGTCACCCTTGACAA GTTGCGTGATTATTTAGCTGACTATGACGTAGCAGTCACTGCCGGCCCATTCATGGATAATGCTATTAATGTTGGTGGTA CAGGATTACAGTATGCCGCCATTACTGCACCTTATGTAGTTCTCACTGGCTTAGGTGAGTCCTTTAAGAAAGTTGCAACC ATACCGTATAAGGTTTGCAACTCTGTTAAGGATACTCTGGCTTATTATGCTCACAGCGTGTTGTACAGAGTTTTTCCTTA TGACATGGATTCTGGTGTGTCATCCTTTAGTGAACTACTTTTTGATTGCGTTGATCTTTCAGTAGCTTCTACCTATTTTT TAGTCCGCATCTTGCAAGATAAGACTGGCGACTTTATGTCTACAATTATTACTTCCTGCCAAACTGCTGTTAGTAAGCTT CTAGATACATGTTTTGAAGCTACAGAAGCAACATTTAACTTCTTGTTAGATTTGGCAGGATTGTTCAGAATCTTTCTCCG CAATGCCTATGTGTACACTTCACAAGGGTTTGTGGTGGTCAATGGCAAAGTTTCTACACTTGTCAAACAAGTGTTAGACT TGCTTAATAAGGGTATGCAACTTTTGCATACAAAGGTCTCCTGGGCTGGTTCTAAAATCATTGCTGTTATCTACAGCGGC AGGGAGTCTCTAATATTCCCATCGGGAACCTATTACTGTGTCACCACTAAGGCTAAGTCCGTTCAACAAGATCTTGACGT TATTTTGCCTGGTGAGTTTTCCAAGAAGCAGTTAGGACTGCTCCAACCTACTGACAATTCTACAACTGTTAGTGTTACTG TATCCAGTAACATGGTTGAAACTGTTGTGGGTCAACTTGAGCAAACTAATATGCATAGTCCTGATGTTATAGTAGGTGAC TATGTCATTATTAGTGAAAAATTGTTTGTGCGTAGTAAGGAAGAAGACGGATTTGCCTTCTACCCTGCTTGCACTAATGG TCATGCTGTACCGACTCTCTTTAGACTTAAGGGAGGTGCACCTGTAAAAAAAGTAGCCTTTGGCGGTGATCAAGTACATG AGGTTGCTGCTGTAAGAAGTGTTACTGTCGAGTACAACATTCATGCTGTATTAGACACACTACTTGCTTCTTCTAGTCTT AGAACCTTTGTTGTAGATAAGTCTTTGTCAATTGAGGAGTTTGCTGACGTAGTAAAGGAACAAGTCTCAGACTTGCTTGT TAAATTACTGCGTGGAATGCCGATTCCAGATTTTGATTTAGACGATTTTATTGACGCACCATGCTATTGCTTTAACGCTG AGGGTGATGCATCCTGGTCTTCTACTATGATCTTCTCTCTTCACCCCGTCGAGTGTGACGAGGAGTGTTCTGAAGTAGAG GCTTCAGATTTAGAAGAAGGTGAATCAGAGTGCATTTCTGAGACTTCAACTGAACAAGTTGACGTTTCTCATGAGACTTC TGACGACGAGTGGGCTGCTGCAGTTGATGAAGCGTTCCCTCTCGATGAAGCAGAAGATGTTACTGAATCTGTGCAAGAAG AAGCACAACCAGTAGAAGTACCTGTTGAAGATATTGCGCAGGTTGTCATAGCTGACACCTTACAGGAAACTCCTGTTGTG CCTGATACTGTTGAAGTCCCACCGCAAGTGGTGAAACTTCCGTCTGCACCTCAGACTATCCAGCCCGAGGTAAAAGAAGT TGCACCTGTCTATGAGGCTGATACCGAACAGACACAGAATGTTACTGTTAAACCTAAGAGGTTACGCAAAAAGCGTAATG TTGACCCTTTGTCCAATTTTGAACATAAGGTTATTACAGAGTGCGTTACCATAGTTTTAGGTGACGCAATTCAAGTAGCC AAGTGCTATGGGGAGTCTGTGTTAGTTAATGCTGCTAACACACATCTTAAGCATGGCGGTGGTATCGCTGGTGCTATTAA TGCGGCTTCAAAAGGGGCTGTCCAAAAAGAGTCAGATGAGTATATTCTGGCTAAAGGGCCGTTACAAGTAGGAGATTCAG TTCTCTTGCAAGGCCATTCTCTAGCTAAGAATATCCTGCATGTCGTAGGCCCAGATGCCCGCGCTAAACAGGATGTTTCT CTCCTTAGTAAGTGCTATAAGGCTATGAATGCATATCCTCTTGTAGTCACTCCTCTTGTTTCAGCAGGCATATTTGGTGT AAAACCAGCTGTGTCTTTTGATTATCTTATTAGGGAGGCTAAGACTAGAGTTTTAGTCGTCGTTAATTCCCAAGATGTCT ATAAGAGTCTTACCATAGTTGACATTCCACAGAGTTTGACTTTTTCATATGATGGGTTACGTGGCGCAATACGTAAAGCT AAAGATTATGGTTTTACTGTTTTTGTGTGCACAGACAACTCTGCTAACACTAAAGTTCTTAGGAACAAGGGTGTTGATTA TACTAAGAAGTTTCTTACAGTTGACGGTGTGCAATATTATTGCTACACGTCTAAGGACACTTTAGATGATATCTTACAAC AGGCTAATAAGTCTGTTGGTATTATATCTATGCCTTTGGGATATGTGTCTCATGGTTTAGACTTAATGCAAGCAGGGAGT GTCGTGCGTAGAGTTAACGTGCCCTACGTGTGTCTCCTAGCTAATAAAGAGCAAGAAGCTATTTTGATGTCTGAAGACGT TAAGTTAAACCCTTCAGAAGATTTTATAAAGCACGTCCGCACTAATGGTGGTTACAATTCTTGGCATTTAGTCGAGGGTG AACTATTGGTGCAAGACTTACGCTTAAATAAGCTCCTGCATTGGTCTGATCAAACCATATGCTACAAGGATAGTGTGTTT TATGTTGTAAAGAATAGTACAGCTTTTCCATTTGAAACACTTTCAGCATGTCGTGCGTATTTGGATTCACGCACGACACA GCAGTTAACAATCGAAGTCTTAGTGACTGTCGATGGTGTAAATTTTAGAACAGTCGTTCTAAATAATAAGAACACTTATA GATCACAGCTTGGATGCGTTTTCTTTAATGGTGCTGATATTTCTGACACCATTCCTGATGAGAAACAGAATGGTCACAGT TTATATCTAGCAGACAATTTGACTGCTGATGAAACAAAGGCGCTTAAAGAGTTATATGGCCCCGTTGATCCTACTTTCTT ACACAGATTCTATTCACTTAAGGCTGCAGTCCATGGGTGGAAGATGGTTGTGTGTGATAAGGTACGTTCTCTCAAATTGA GTGATAATAATTGTTATCTTAATGCAGTTATTATGACACTTGATTTATTGAAGGACATTAAATTTGTTATACCTGCTCTA CAGCATGCATTTATGAAACATAAGGGCGGTGATTCAACTGACTTCATAGCCCTCATTATGGCTTATGGCAATTGCACATT TGGTGCTCCAGATGATGCCTCTCGGTTACTTCATACCGTGCTTGCAAAGGCTGAGTTATGCTGTTCTGCACGCATGGTTT GGAGAGAGTGGTGCAATGTCTGTGGCATAAAAGATGTTGTTCTACAAGGCTTAAAAGCTTGTTGTTACGTGGGTGTGCAA ACTGTTGAAGATCTGCGTGCTCGCATGACATATGTATGCCAGTGTGGTGGTGAACGTCATCGGCAATTAGTCGAACACAC CACCCCCTGGTTGCTGCTCTCAGGCACACCAAATGAAAAATTGGTGACAACCTCCACGGCGCCTGATTTTGTAGCATTTA ATGTCTTTCAGGGCATTGAAACGGCTGTTGGCCATTATGTTCATGCTCGCCTGAAGGGTGGTCTTATTTTAAAGTTTGAC TCTGGCACCGTTAGCAAGACTTCAGACTGGAAGTGCAAGGTGACAGATGTACTTTTCCCCGGCCAAAAATACAGTAGCGA TTGTAATGTCGTACGGTATTCTTTGGACGGTAATTTCAGAACAGAGGTTGATCCCGACCTATCTGCTTTCTATGTTAAGG ATGGTAAATACTTTACAAGTGAACCACCCGTAACATATTCACCAGCTACAATTTTAGCTGGTAGTGTCTACACTAATAGC TGCCTTGTATCGTCTGATGGACAACCTGGCGGTGATGCTATTAGTTTGAGTTTTAATAACCTTTTAGGGTTTGATTCTAG TAAACCAGTCACTAAGAAATACACTTACTCCTTCTTGCCTAAAGAAGACGGCGATGTGTTGTTGGCTGAGTTTGACACTT ATGACCCTATTTATAAGAATGGTGCCATGTATAAAGGCAAACCAATTCTTTGGGTCAATAAAGCATCTTATGATACTAAT CTTAATAAGTTCAATAGAGCTAGTTTGCGTCAAATTTTTGACGTAGCCCCCATTGAACTCGAAAATAAATTCACACCTTT GAGTGTGGAGTCTACACCAGTTGAACCTCCAACTGTAGATGTGGTAGCACTTCAACAGGAAATGACAATTGTCAAATGTA AGGGTTTAAATAAACCTTTCGTGAAGGACAATGTCAGTTTCGTTGCTGATGATTCAGGTACTCCCGTTGTTGAGTATCTG TCTAAAGAAGACCTACATACATTGTATGTAGACCCTAAGTATCAAGTCATTGTCTTAAAAGACAATGTACTTTCTTCTAT GCTTAGATTGCACACCGTTGAGTCAGGTGATATTAACGTTGTTGCAGCTTCCGGATCTTTGACACGTAAAGTGAAGTTAC TATTTAGGGCTTCATTTTATTTCAAAGAATTTGCTACCCGCACTTTCACTGCTACCACTGCTGTAGGTAGTTGTATAAAG AGTGTAGTGCGGCATCTAGGTGTTACTAAAGGCATATTGACAGGCTGTTTTAGTTTTGCCAAGATGTTATTTATGCTTCC ACTAGCTTACTTTAGTGATTCAAAACTCGGCACCACAGAGGTTAAAGTGAGTGCTTTGAAAACAGCCGGCGTTGTGACAG GTAATGTTGTAAAACAGTGTTGCACTGCTGCTGTTGATTTAAGTATGGATAAGTTGCGCCGTGTGGATTGGAAATCAACC CTACGGTTGTTACTTATGTTATGCACAACTATGGTATTGTTGTCTTCTGTGTATCACTTGTATGTCTTCAATCAGGTCTT ATCAAGTGATGTTATGTTTGAAGATGCCCAAGGTTTGAAAAAGTTCTACAAAGAAGTTAGAGCTTACCTAGGAATCTCTT CTGCTTGTGACGGTCTTGCTTCAGCTTATAGGGCGAATTCCTTTGATGTACCTACATTCTGCGCAAACCGTTCTGCAATG TGTAATTGGTGCTTGATTAGCCAAGATTCCATAACTCACTACCCAGCTCTTAAGATGGTTCAAACACATCTTAGCCACTA TGTTCTTAACATAGATTGGTTGTGGTTTGCATTTGAGACTGGTTTGGCATACATGCTCTATACCTCGGCCTTCAACTGGT TGTTGTTGGCAGGTACATTGCATTATTTCTTTGCACAGACTTCCATATTTGTAGACTGGCGGTCATACAATTATGCTGTG TCTAGTGCCTTCTGGTTATTCACCCACATTCCAATGGCGGGTTTGGTACGAATGTATAATTTGTTAGCATGCCTTTGGCT TTTACGCAAGTTTTATCAGCATGTAATCAATGGTTGCAAAGATACGGCATGCTTGCTCTGCTATAAGAGGAACCGACTTA CTAGAGTTGAAGCTTCTACCGTTGTCTGTGGTGGAAAACGTACGTTTTATATCACAGCAAATGGCGGTATTTCATTCTGT CGTAGGCATAATTGGAATTGTGTGGATTGTGACACTGCAGGTGTGGGGAATACCTTCATCTGTGAAGAAGTCGCAAATGA CCTCACTACCGCCCTACGCAGGCCTATTAACGCTACGGATAGATCACATTATTATGTGGATTCCGTTACAGTTAAAGAGA CTGTTGTTCAGTTTAATTATCGTAGAGACGGTCAACCATTCTACGAGCGGTTTCCCCTCTGCGCTTTTACAAATCTAGAT AAGTTGAAGTTCAAAGAGGTCTGTAAAACTACTACTGGTATACCTGAATACAACTTTATCATCTACGACTCATCAGATCG TGGCCAGGAAAGTTTAGCTAGGTCTGCATGTGTTTATTATTCTCAAGTCTTGTGTAAATCAATTCTTTTGGTTGACTCAA GTTTGGTTACTTCTGTTGGTGATTCTAGTGAAATCGCCACTAAAATGTTTGATTCCTTTGTTAATAGTTTCGTCTCGCTG TATAATGTCACACGCGATAAGTTGGAAAAACTTATCTCTACTGCTCGTGATGGCGTAAGGCGAGGCGATAACTTCCATAG TGTCTTAACAACATTCATTGACGCAGCACGAGGCCCCGCAGGTGTGGAGTCTGATGTTGAGACCAATGAAATTGTTGACT CTGTGCAGTATGCTCATAAACATGACATACAAATTACTAATGAGAGCTACAATAATTATGTACCCTCATATGTTAAACCT GATAGTGTGTCTACCAGCGATTTAGGTAGTCTCATTGATTGTAATGCGGCTTCAGTTAACCAAATTGTCTTGCGTAATTC TAATGGTGCTTGCATTTGGAACGCTGCTGCATATATGAAACTCTCGGATGCACTTAAACGACAGATTCGCATTGCATGCC GTAAGTGTAATTTAGCTTTCCGGTTAACCACCTCAAAGCTACGCGCTAATGATAATATCTTATCAGTTAGATTCACTGCT AACAAAATTGTTGGTGGTGCTCCTACATGGTTTAATGCGTTGCGTGACTTTACGTTAAAGGGTTATGTTCTTGCTACCAT TATTGTGTTTCTGTGTGCTGTACTGATGTATTTGTGTTTACCTACATTTTCTATGGCACCTGTTGAATTTTATGAAGACC GCATCTTGGACTTTAAAGTTCTTGATAATGGTATCATTAGGGATGTAAATCCTGATGATAAGTGCTTTGCTAATAAGCAC CGGTCCTTCACACAATGGTATCATGAGCATGTTGGTGGTGTCTATGACAACTCTATCACATGCCCATTGACAGTTGCAGT AATTGCTGGAGTTGCTGGTGCTCGCATTCCAGACGTACCTACTACATTGGCTTGGGTGAACAATCAGATAATTTTCTTTG TTTCTCGAGTCTTTGCTAATACAGGCAGTGTTTGCTACACTCCTATAGATGAGATACCCTATAAGAGTTTCTCTGATAGT GGTTGCATTCTTCCATCTGAGTGCACTATGTTTAGGGATGCAGAGGGCCGTATGACACCATACTGCCATGATCCTACTGT TTTGCCTGGGGCTTTTGCGTACAGTCAGATGAGGCCTCATGTTCGTTACGACTTGTATGATGGTAACATGTTTATTAAAT TTCCTGAAGTAGTATTTGAAAGTACACTTAGGATTACTAGAACTCTGTCAACTCAGTACTGCCGGTTCGGTAGTTGTGAG TATGCACAAGAGGGTGTTTGTATTACCACAAATGGCTCGTGGGCCATTTTTAATGACCACCATCTTAATAGACCTGGTGT CTATTGTGGCTCTGATTTTATTGACATTGTCAGGCGGTTAGCAGTATCACTGTTCCAGCCTATTACTTATTTCCAATTGA CTACCTCATTGGTCTTGGGTATAGGTTTGTGTGCGTTCCTGACTTTGCTCTTCTATTATATTAATAAAGTAAAACGTGCT TTTGCAGATTACACCCAGTGTGCTGTAATTGCTGTTGTTGCTGCTGTTCTTAATAGCTTGTGCATCTGCTTTGTTACCTC TATACCATTGTGTATAGTACCTTACACTGCATTGTACTATTATGCTACATTCTATTTTACTAATGAGCCTGCATTTATTA TGCATGTTTCTTGGTACATTATGTTCGGGCCTATCGTTCCCATATGGATGACCTGCGTCTATACAGTTGCAATGTGCTTT AGACACTTCTTCTGGGTTTTAGCTTATTTTAGTAAGAAACATGTAGAAGTTTTTACTGATGGTAAGCTTAATTGTAGTTT CCAGGACGCTGCCTCTAATATCTTTGTTATTAACAAGGACACTTATGCAGCTCTTAGAAACTCTTTAACTAATGATGCCT ATTCACGATTTTTGGGGTTGTTTAACAAGTATAAGTACTTCTCTGGTGCTATGGAAACAGCCGCTTATCGTGAAGCTGCA GCATGTCATCTTGCTAAAGCCTTACAAACATACAGCGAGACTGGTAGTGATCTTCTTTACCAACCACCCAACTGTAGCAT AACCTCTGGCGTGTTGCAAAGCGGTTTGGTGAAAATGTCACATCCCAGTGGAGATGTTGAGGCTTGTATGGTTCAGGTTA CCTGCGGTAGCATGACTCTTAATGGTCTTTGGCTTGACAACACAGTCTGGTGCCCACGACACGTAATGTGCCCGGCTGAC CAGTTGTCTGATCCTAATTATGATGCCTTGTTGATTTCTATGACTAATCATAGTTTCAGTGTGCAAAAACACATTGGCGC TCCAGCAAACTTGCGTGTTGTTGGTCATGCCATGCAAGGCACTCTTTTGAAGTTGACTGTCGATGTTGCTAACCCTAGCA CTCCAGCCTACACTTTTACAACAGTGAAACCTGGCGCAGCATTTAGTGTGTTAGCATGCTATAATGGTCGTCCGACTGGT ACATTCACTGTTGTAATGCGCCCTAACTACACAATTAAGGGTTCCTTTCTGTGTGGTTCTTGTGGTAGTGTTGGTTACAC CAAGGAGGGTAGTGTGATCAATTTCTGTTACATGCATCAAATGGAACTTGCTAATGGTACACATACCGGTTCAGCATTTG ATGGTACTATGTATGGTGCCTTTATGGATAAACAAGTGCACCAAGTTCAGTTAACAGACAAATACTGCAGTGTTAATGTA GTAGCTTGGCTTTACGCAGCAATACTTAATGGTTGCGCTTGGTTTGTAAAACCTAATCGCACTAGTGTTGTTTCTTTTAA TGAATGGGCTCTTGCCAACCAATTCACTGAATTTGTTGGCACTCAATCCGTTGACATGTTAGCTGTCAAAACAGGCGTTG CTATTGAACAGCTGCTTTATGCGATCCAACAACTGTATACTGGGTTCCAGGGAAAGCAAATCCTTGGCAGTACCATGTTG GAAGATGAATTCACACCTGAGGATGTTAATATGCAGATTATGGGTGTGGTTATGCAGAGTGGTGTGAGAAAAGTTACATA TGGTACTGCGCATTGGTTGTTTGCGACCCTTGTCTCAACCTATGTGATAATCTTACAAGCCACTAAATTTACTTTGTGGA ACTACTTGTTTGAGACTATTCCCACACAGTTGTTCCCACTCTTATTTGTGACTATGGCCTTCGTTATGTTGTTGGTTAAA CACAAACACACCTTTTTGACACTTTTCTTGTTGCCTGTGGCTATTTGTTTGACTTATGCAAACATAGTCTACGAGCCCAC TACTCCCATTTCGTCAGCGCTGATTGCAGTTGCAAATTGGCTTGCCCCCACTAATGCTTATATGCGCACTACACATACTG ATATTGGTGTCTACATTAGTATGTCACTTGTATTAGTCATTGTAGTGAAGAGATTGTACAACCCATCACTTTCTAACTTT GCGTTAGCATTGTGCAGTGGTGTAATGTGGTTGTACACTTATAGCATTGGAGAAGCCTCAAGCCCCATTGCCTATCTGGT TTTTGTCACTACACTCACTAGTGATTATACGATTACAGTCTTTGTTACTGTCAACCTTGCAAAAGTTTGCACTTATGCCA TCTTTGCTTACTCACCACAGCTTACACTTGTGTTTCCGGAAGTGAAGATGATACTTTTATTATACACATGTTTAGGTTTC ATGTGTACTTGCTATTTTGGTGTCTTCTCTCTTTTGAACCTTAAGCTTAGAGCACCTATGGGTGTCTATGACTTTAAGGT CTCAACACAAGAGTTCAGATTCATGACTGCTAACAATCTAACTGCACCTAGAAATTCTTGGGAGGCTATGGCTCTGAACT TTAAGTTAATAGGTATTGGCGGTACACCTTGTATAAAGGTTGCTGCTATGCAGTCTAAACTTACAGATCTTAAATGCACA TCTGTGGTTCTCCTCTCTGTGCTCCAACAGTTACACTTAGAGGCTAATAGTAGGGCCTGGGCTTTCTGTGTTAAATGCCA TAATGATATATTGGCAGCAACAGACCCCAGTGAGGCTTTCGAGAAATTCGTAAGTCTCTTTGCTACTTTAATGACTTTTT CTGGTAATGTAGATCTTGATGCGTTAGCTAGTGATATTTTTGACACTCCTAGCGTACTTCAAGCTACTCTTTCTGAGTTT TCACACTTAGCTACCTTTGCTGAGTTGGAAGCTGCGCAGAAAGCCTATCAGGAAGCTATGGACTCTGGTGACACCTCACC ACAAGTTCTTAAGGCTTTGCAGAAGGCTGTTAATATAGCTAAAAACGCCTATGAGAAGGATAAGGCAGTGGCCCGTAAGT TAGAACGTATGGCTGATCAGGCTATGACTTCTATGTATAAGCAAGCACGTGCTGAAGACAAGAAAGCAAAAATTGTCAGT GCTATGCAAACTATGTTGTTTGGTATGATTAAGAAGCTCGACAACGATGTTCTTAATGGTATCATTTCTAACGCTAGGAA TGGTTGTATACCTCTTAGTGTCATCCCACTGTGTGCTTCAAATAAACTTCGCGTTGTAATTCCTGACTTCACCGTCTGGA ATCAGGTAGTCACATATCCCTCGCTTAACTACGCTGGGGCTTTGTGGGACATTACAGTTATAAACAATGTGGACAATGAA ATTGTTAAGTCTTCAGATGTTGTAGACAGCAATGAAAATTTAACATGGCCACTTGTTTTAGAATGCACTAGGGCATCCAC TTCTGCCGTTAAGTTGCAAAATAATGAGATCAAACCTTCAGGTCTAAAAACCATGGTTGTGTCTGCGGGTCAAGAGCAAA CTAACTGTAATACTAGTTCCTTAGCTTATTACGAACCTGTGCAGGGTCGTAAAATGCTGATGGCTCTTCTTTCTGATAAT GCCTATCTCAAATGGGCGCGTGTTGAAGGTAAGGACGGATTTGTCAGTGTAGAGCTACAACCTCCTTGCAAATTCTTGAT TGCGGGACCAAAAGGACCTGAAATCCGATATCTCTATTTTGTTAAAAATCTTAACAACCTTCATCGCGGGCAAGTGTTAG GGCACATTGCTGCGACTGTTAGATTGCAAGCTGGTTCTAACACCGAGTTTGCCTCTAATTCCTCGGTGTTGTCACTTGTT AACTTCACCGTTGATCCTCAAAAAGCTTATCTCGATTTCGTCAATGCGGGAGGTGCCCCATTGACAAATTGTGTTAAGAT GCTTACTCCTAAAACTGGTACAGGTATAGCTATATCTGTTAAACCAGAGAGTACAGCTGATCAAGAGACTTATGGTGGAG CTTCAGTGTGTCTCTATTGCCGTGCGCATATAGAACATCCTGATGTCTCTGGTGTTTGTAAATATAAGGGTAAGTTTGTC CAAATCCCTGCTCAGTGTGTCCGTGACCCTGTGGGATTTTGTTTGTCAAATACCCCCTGTAATGTCTGTCAATATTGGAT TGGATATGGGTGCAATTGTGACTCGCTTAGGCAAGCAGCACTGCCCCAATCTAAAGATTCCAATTTTTTAAACGAGTCCG GGGTTCTATTGTAAATGCCCGAATAGAACCCTGTTCAAGTGGTTTGTCCACTGATGTCGTCTTTAGGGCATTTGACATCT GCAACTATAAGGCTAAGGTTGCTGGTATTGGAAAATACTACAAGACTAATACTTGTAGGTTTGTAGAATTAGATGACCAA GGGCATCATTTAGACTCCTATTTTGTCGTTAAGAGGCATACTATGGAGAATTATGAACTAGAGAAGCACTGTTACGACTT GTTACGTGACTGTGATGCTGTAGCTCCCCATGATTTCTTCATCTTTGATGTAGACAAAGTTAAAACACCTCATATTGTAC GTCAGCGTTTAACTGAGTACACTATGATGGATCTTGTATATGCCCTGAGGCACTTTGATCAAAATAGCGAAGTGCTTAAG GCTATCTTAGTGAAGTATGGTTGCTGTGATGTTACCTACTTTGAAAATAAACTCTGGTTTGATTTTGTTGAAAATCCCAG TGTTATTGGTGTTTATCATAAACTTGGAGAACGTGTACGCCAAGCTATCTTAAACACTGTTAAATTTTGTGACCACATGG TCAAGGCTGGTTTAGTCGGTGTGCTCACACTAGACAACCAGGACCTTAATGGCAAGTGGTATGATTTTGGTGACTTCGTA ATCACTCAACCTGGTTCAGGAGTAGCTATAGTTGATAGCTACTATTCTTATTTGATGCCTGTGCTCTCAATGACCGATTG TCTGGCCGCTGAGACACATAGGGATTGTGATTTTAATAAACCACTCATTGAGTGGCCACTTACTGAGTATGATTTTACTG ATTATAAGGTACAACTCTTTGAGAAGTACTTTAAATATTGGGATCAGACGTATCACGCAAATTGCGTTAATTGTACTGAT GACCGTTGTGTGTTACATTGTGCTAATTTCAATGTATTGTTTGCTATGACCATGCCTAAGACTTGTTTCGGACCCATAGT CCGAAAGATCTTTGTTGATGGCGTGCCATTTGTAGTATCTTGTGGTTATCACTACAAAGAATTAGGTTTAGTCATGAATA TGGATGTTAGTCTCCATAGACATAGGCTCTCTCTTAAGGAGTTGATGATGTATGCCGCTGATCCAGCCATGCACATTGCC TCCTCTAACGCTTTTCTTGATTTGAGGACATCATGTTTTAGTGTCGCTGCACTTACAACTGGTTTGACTTTTCAAACTGT GCGGCCTGGCAATTTTAACCAAGACTTCTATGATTTCGTGGTATCTAAAGGTTTCTTTAAGGAGGGCTCTTCAGTGACGC TCAAACATTTTTTCTTTGCTCAAGATGGTAATGCTGCTATTACAGATTATAATTACTATTCTTATAATCTGCCTACTATG TGTGACATCAAACAAATGTTGTTCTGCATGGAAGTTGTAAACAAGTACTTCGAAATCTATGACGGTGGTTGTCTTAATGC TTCTGAAGTGGTTGTTAATAATTTAGACAAGAGTGCTGGCCATCCTTTTAATAAGTTTGGCAAAGCTCGTGTCTATTATG AGAGCATGTCTTACCAGGAGCAAGATGAACTTTTTGCCATGACAAAGCGTAACGTCATTCCTACCATGACTCAAATGAAT CTAAAATATGCTATTAGTGCTAAGAATAGAGCTCGCACTGTTGCAGGCGTGTCCATACTTAGCACAATGACTAATCGCCA GTACCATCAGAAAATGCTTAAGTCCATGGCTGCAACTCGTGGAGCGACTTGCGTCATTGGTACTACAAAGTTCTACGGTG GCTGGGATTTCATGCTTAAAACATTGTACAAAGATGTTGATAATCCGCATCTTATGGGTTGGGATTACCCTAAGTGTGAT AGAGCTATGCCTAATATGTGTAGAATCTTCGCTTCACTCATATTAGCTCGTAAACATGGCACTTGTTGTACTACAAGGGA CAGATTTTATCGCTTGGCAAATGAGTGTGCTCAGGTGCTAAGCGAATATGTTCTATGTGGTGGTGGTTACTACGTCAAAC CTGGAGGTACCAGTAGCGGAGATGCCACCACTGCATATGCCAATAGTGTCTTTAACATTTTGCAGGCGACAACTGCTAAT GTCAGTGCACTTATGGGTGCTAATGGCAACAAGATTGTTGACAAAGAAGTTAAAGACATGCAGTTTGATTTGTATGTCAA TGTTTACAGGAGCACTAGCCCAGACCCCAAATTTGTTGATAAATACTATGCTTTTCTTAATAAGCACTTTTCTATGATGA TACTGTCTGATGACGGTGTCGTTTGCTATAATAGTGATTATGCAGCTAAGGGTTACATTGCTGGAATACAGAATTTTAAG GAAACGCTGTATTATCAGAACAATGTCTTTATGTCTGAAGCTAAATGCTGGGTGGAAACCGATCTGAAGAAAGGGCCACA TGAATTCTGTTCACAGCATACGCTTTATATTAAGGATGGCGACGATGGTTACTTCCTTCCTTATCCAGACCCTTCAAGAA TTTTGTCTGCCGGTTGCTTTGTAGATGATATCGTTAAGACTGACGGTACACTCATGGTAGAGCGGTTTGTGTCTTTGGCT ATAGATGCTTACCCTCTCACAAAGCATGAAGATATAGAATACCAGAATGTATTCTGGGTCTACTTACAGTATATAGAAAA ACTGTATAAAGACCTTACAGGACACATGCTTGACAGTTATTCTGTCATGCTATGTGGTGATAATTCTGCTAAGTTTTGGG AAGAGGCATTCTATAGAGATCTCTATAGTTCGCCTACCACTTTGCAGGCTGTCGGTTCATGCGTTGTATGCCATTCACAG ACTTCCCTACGCTGTGGGACATGCATCCGTAGACCATTTCTCTGCTGTAAATGCTGCTATGATCATGTTATAGCAACTCC ACATAAGATGGTTTTGTCTGTTTCTCCTTACGTTTGTAATGCCCCTGGTTGTGGCGTTTCAGACGTTACTAAGCTATATT TAGGTGGTATGAGCTACTTTTGTGTAGATCATAGACCTGTGTGTAGTTTTCCACTTTGCGCTAATGGTCTTGTATTCGGC TTATACAAGAATATGTGCACAGGTAGTCCTTCTATAGTTGAATTTAATAGGTTGGCTACCTGTGACTGGACTGAAAGTGG TGATTACACCCTTGCCAATACTACAACAGAACCACTCAAACTTTTTGCTGCTGAGACTTTACGTGCCACTGAAGAGGCGT CTAAGCAGTCTTATGCTATTGCCACCATCAAAGAAATTGTTGGTGAGCGCCAACTATTACTTGTGTGGGAGGCTGGCAAG TCCAAACCACCACTCAATCGTAATTATGTTTTTACTGGTTATCATATAACCAAAAATAGTAAAGTGCAGCTCGGTGAGTA CATTTTCGAGCGCATTGATTATAGTGATGCTGTATCCTACAAGTCTAGTACAACGTATAAACTGACTGTAGGTGACATCT TCGTACTTACCTCTCACTCTGTGGCTACCTTGACGGCGCCCACAATTGTGAATCAAGAGAGGTATGTTAAAATTACTGGG TTGTACCCAACCATTACGGTACCTGAAGAGTTCGCAAGTCATGTTGCCAACTTCCAAAAATCAGGTTATAGTAAATATGT CACTGTTCAGGGACCACCTGGCACTGGCAAAAGTCATTTTGCTATAGGGTTAGCGATTTACTACCCTACAGCACGTGTTG TTTATACAGCATGTTCACACGCAGCTGTTGATGCTTTGTGTGAAAAAGCTTTTAAATATTTGAACATTGCTAAATGTTCC CGTATCATTCCTGCAAAGGCACGTGTTGAGTGCTATGACAGGTTTAAAGTTAATGAGACAAATTCTCAATATTTGTTTAG TACTATTAATGCTCTACCAGAAACTTCTGCCGATATTCTGGTGGTTGATGAGGTTAGTATGTGCACTAATTATGATCTTT CAATTATTAATGCACGTATTAAAGCTAAGCACATTGTCTATGTAGGAGATCCAGCACAGTTGCCAGCTCCTAGGACTTTG TTGACTAGAGGCACATTGGAACCAGAAAATTTCAATAGTGTCACTAGATTGATGTGTAACTTAGGTCCTGACATATTTTT AAGTATGTGCTACAGGTGTCCTAAGGAAATAGTAAGCACTGTGAGCGCTCTTGTCTACAATAATAAATTGTTAGCCAAGA AGGAGCTTTCAGGCCAGTGCTTTAAAATACTCTATAAGGGCAATGTGACGCATGATGCTAGCTCTGCCATTAATAGACCA CAACTCACATTTGTGAAGAATTTTATTACTGCCAATCCGGCATGGAGTAAGGCAGTCTTTATTTCGCCTTACAATTCACA GAATGCTGTGTCTCGTTCAATGCTGGGTCTTACCACTCAGACTGTTGATTCCTCACAGGGTTCAGAATACCAGTACGTTA TCTTCTGTCAAACAGCAGATACGGCACATGCTAACAACATTAACAGATTTAATGTTGCAATCACTCGTGCCCAAAAAGGT ATTCTTTGTGTTATGACATCTCAGGCACTCTTTGAGTCCTTAGAGTTTACTGAATTGTCTTTTACTAATTACAAGCTCCA GTCTCAGATTGTAACTGGCCTTTTTAAAGATTGCTCTAGAGAAACTTCTGGCCTCTCACCTGCTTATGCACCAACATATG TTAGTGTTGATGACAAGTATAAGACGAGTGATGAGCTTTGCGTGAATCTTAATTTACCCGCAAATGTCCCATACTCTCGT GTTATTTCCAGGATGGGCTTTAAACTCGATGCAACAGTTCCTGGATATCCTAAGCTTTTCATTACTCGTGAAGAGGCTGT AAGGCAAGTTCGAAGCTGGATAGGCTTCGATGTTGAGGGTGCTCATGCTTCCCGTAATGCATGTGGCACCAATGTGCCTC TACAATTAGGATTTTCAACTGGTGTGAACTTTGTTGTTCAGCCAGTTGGTGTTGTAGACACTGAGTGGGGTAACATGTTA ACGGGCATTGCTGCACGTCCTCCACCAGGTGAACAGTTTAAGCACCTCGTGCCTCTTATGCATAAGGGGGCTGCGTGGCC TATTGTTAGACGACGTATAGTGCAAATGTTGTCAGACACTTTAGACAAATTGTCTGATTACTGTACGTTTGTTTGTTGGG CTCATGGCTTTGAATTAACGTCTGCATCATACTTTTGCAAGATAGGTAAGGAACAGAAGTGTTGCATGTGCAATAGACGC GCTGCAGCGTACTCTTCACCTCTGCAATCTTATGCCTGCTGGACTCATTCCTGCGGTTATGATTATGTCTACAACCCTTT CTTTGTCGATGTTCAACAGTGGGGTTATGTAGGCAATCTTGCTACTAATCACGATCGTTATTGCTCTGTCCATCAAGGAG CTCATGTGGCTTCTAATGATGCAATAATGACTCGTTGTTTAGCTATTCATTCTTGTTTTATAGAACGTGTGGATTGGGAT ATAGAGTATCCTTATATCTCACATGAAAAGAAATTGAATTCCTGTTGTAGAATCGTTGAGCGCAACGTCGTACGTGCTGC TCTTCTTGCCGGTTCATTTGACAAAGTCTATGATATTGGCAATCCTAAAGGAATTCCTATTGTTGATGACCCTGTGGTTG ATTGGCATTATTTTGATGCACAGCCCTTGACCAGGAAGGTACAACAGCTTTTCTATACAGAGGACATGGCCTCAAGATTT GCTGATGGGCTCTGCTTATTTTGGAACTGTAATGTACCAAAATATCCTAATAATGCAATTGTATGCAGGTTTGACACACG TGTGCATTCTGAGTTCAATTTGCCAGGTTGTGATGGCGGTAGTTTGTATGTTAACAAGCACGCTTTTCATACACCAGCAT ATGATGTGAGTGCATTCCGTGATCTGAAACCTTTACCATTCTTTTATTATTCTACTACACCATGTGAAGTGCATGGTAAT GGTAGTATGATAGAGGATATTGATTATGTACCCCTAAAATCTGCAGTCTGTATTACAGCTTGTAATTTAGGGGGCGCTGT TTGTAGGAAGCATGCTACAGAGTACAGAGAGTATATGGAAGCATATAATCTTGTCTCTGCATCAGGTTTCCGCCTTTGGT GTTATAAGACCTTTGATATTTATAATCTCTGGTCTACTTTTACAAAAGTTCAAGGTTTGGAAAACATTGCTTTTAATGTT GTTAAACAAGGCCATTTTATTGGTGTTGAGGGTGAACTACCTGTAGCTGTAGTCAATGATAAGATCTTCACCAAGAGTGG CGTTAATGACATTTGTATGTTTGAGAATAAAACCACTTTGCCTACTAATATAGCTTTTGAACTCTATGCTAAGCGTGCTG TACGCTCGCATCCCGATTTCAAATTGCTACACAATTTACAAGCAGACATTTGCTACAAGTTCGTCCTTTGGGATTATGAA CGTAGCAATATTTATGGTACTGCTACTATTGGTGTATGTAAGTACACTGATATTGATGTTAATTCAGCTTTGAATATATG TTTTGACATACGCGATAATTGTTCATTGGAGAAGTTCATGTCTACTCCCAATGCCATCTTTATTTCTGATAGAAAAATCA AGAAATACCCTTGTATGGTAGGTCCTGATTATGCTTACTTCAATGGTGCTATCATCCGTGATAGTGATGTTGTTAAACAA CCAGTGAAGTTCTACTTGTATAAGAAAGTCAATAATGAGTTTATTGATCCTACTGAGTGTATTTACACTCAGAGTCGCTC TTGTAGTGACTTCCTACCCCTTTCTGACATGGAGAAAGACTTTCTATCTTTTGATAGTGATGTTTTCATTAAGAAGTATG GCTTGGAAAACTATGCTTTTGAGCACGTAGTCTATGGAGACTTCTCTCATACTACGTTAGGCGGTCTTCACTTGCTTATT GGTTTATACAAGAAGCAACAGGAAGGTCATATTATTATGGAAGAAATGCTAAAAGGTAGCTCAACTATTCATAACTATTT TATTACTGAGACTAACACAGCGGCTTTTAAGGCGGTGTGTTCTGTTATAGATTTAAAGCTTGACGACTTTGTTATGATTT TAAAGAGTCAAGACCTTGGCGTAGTATCCAAGGTTGTCAAGGTTCCTATTGACTTAACAATGATTGAGTTTATGTTATGG TGTAAGGATGGACAGGTTCAAACCTTCTACCCTCGACTCCAGGCTTCTGCAGATTGGAAACCTGGTCATGCAATGCCATC CCTCTTTAAAGTTCAAAATGTAAACCTTGAACGTTGTGAGCTTGCTAATTACAAGCAATCTATTCCTATGCCTCGCGGTG TGCACATGAACATCGCTAAATATATGCAATTGTGCCAGTATTTAAATACTTGCACATTAGCCGTGCCTGCCAATATGCGT GTTATACATTTTGGCGCTGGTTCTGATAAAGGTATCGCTCCTGGTACCTCAGTTTTACGACAGTGGCTTCCTACAGATGC CATTATTATAGATAATGATTTAAATGAGTTCGTGTCAGATGCTGACATAACTTTATTTGGAGATTGTGTAACTGTACGTG TCGGCCAACAAGTGGATCTTGTTATTTCCGACATGTATGATCCTACTACTAAGAATGTAACAGGTAGTAATGAGTCAAAG GCTTTATTCTTTACTTACCTGTGTAACCTCATTAATAATAATCTTGCTCTTGGTGGGTCTGTTGCTATTAAAATAACAGA ACACTCTTGGAGCGTTGAACTTTATGAACTTATGGGAAAATTTGCTTGGTGGACTGTTTTCTGCACCAATGCAAATGCAT CCTCATCTGAAGGATTCCTCTTAGGTATTAATTACTTGGGTACTATTAAAGAAAATATAGATGGTGGTGCTATGCACGCC AACTATATATTTTGGAGAAATTCCACTCCTATGAATCTGAGTACTTACTCACTTTTTGATTTATCCAAGTTTCAATTAAA ATTAAAAGGAACACCAGTTCTTCAATTAAAGGAGAGTCAAATTAACGAACTCGTAATATCTCTCCTGTCGCAGGGTAAGT TACTTATCCGTGACAATGATACACTCAGTGTTTCTACTGATGTTCTTGTTAACACCTACAGAAAGTTACGTTGATGTAGG GCCAGATTCTGTTAAGTCTGCTTGTATTGAGGTTGATATACAACAGACTTTCTTTGATAAAACTTGGCCTAGGCCAATTG ATGTTTCTAAGGCTGACGGTATTATATACCCTCAAGGCCGTACATATTCTAACATAACTATCACTTATCAAGGTCTTTTT CCCTATCAGGGAGACCATGGTGATATGTATGTTTACTCTGCAGGACATGCTACAGGCACAACTCCACAAAAGTTGTTTGT AGCTAACTATTCTCAGGACGTCAAACAGTTTGCTAATGGGTTTGTCGTCCGTATAGGAGCAGCTGCCAATTCCACTGGCA CTGTTATTATTAGCCCATCTACCAGCGCTACTATACGAAAAATTTACCCTGCTTTTATGCTGGGTTCTTCAGTTGGTAAT TTCTCAGATGGTAAAATGGGCCGCTTCTTCAATCATACTCTAGTTCTTTTGCCCGATGGATGTGGCACTTTACTTAGAGC TTTTTATTGTATTCTAGAGCCTCGCTCTGGAAATCATTGTCCTGCTGGCAATTCCTATACTTCTTTTGCCACTTATCACA CTCCTGCAACAGATTGTTCTGATGGCAATTACAATCGTAATGCCAGTCTGAACTCTTTTAAGGAGTATTTTAATTTACGT AACTGCACCTTTATGTACACTTATAACATTACCGAAGATGAGATTTTAGAGTGGTTTGGCATTACACAAACTGCTCAAGG TGTTCACCTCTTCTCATCTCGGTATGTTGATTTGTACGGCGGCAATATGTTTCAATTTGCCACCTTGCCTGTTTATGATA CTATTAAGTATTATTCTATCATTCCTCACAGTATTCGTTCTATCCAAAGTGATAGAAAAGCTTGGGCTGCCTTCTACGTA TATAAACTTCAACCGTTAACTTTCCTGTTGGATTTTTCTGTTGATGGTTATATACGCAGAGCTATAGACTGTGGTTTTAA TGATTTGTCACAACTCCACTGCTCATATGAATCCTTCGATGTTGAATCTGGAGTTTATTCAGTTTCGTCTTTCGAAGCAA AACCTTCTGGCTCAGTTGTGGAACAGGCTGAAGGTGTTGAATGTGATTTTTCACCTCTTCTGTCTGGCACACCTCCTCAG GTTTATAATTTCAAGCGTTTGGTTTTTACCAATTGCAATTATAATCTTACCAAATTGCTTTCACTTTTTTCTGTGAATGA TTTTACTTGTAGTCAAATATCTCCAGCAGCAATTGCTAGCAACTGTTATTCTTCACTGATTTTGGATTACTTTTCATACC CACTTAGTATGAAATCCGATCTCAGTGTTAGTTCTGCTGGTCCAATATCCCAGTTTAATTATAAACAGTCCTTTTCTAAT CCCACATGTTTGATTTTAGCGACTGTTCCTCATAACCTTACTACTATTACTAAGCCTCTTAAGTACAGCTATATTAACAA GTGCTCTCGTCTTCTTTCTGATGATCGTACTGAAGTACCTCAGTTAGTGAACGCTAATCAATACTCACCCTGTGTATCCA TTGTCCCATCCACTGTGTGGGAAGACGGTGATTATTATAGGAAACAACTATCTCCACTTGAAGGTGGTGGCTGGCTTGTT GCTAGTGGCTCAACTGTTGCCATGACTGAGCAATTACAGATGGGCTTTGGTATTACAGTTCAATATGGTACAGACACCAA TAGTGTTTGCCCCAAGCTTGAATTTGCTAATGACACAAAAATTGCCTCTCAATTAGGCAATTGCGTGGAATATTCCCTCT ATGGTGTTTCGGGCCGTGGTGTTTTTCAGAATTGCACAGCTGTAGGTGTTCGACAGCAGCGCTTTGTTTATGATGCGTAC CAGAATTTAGTTGGCTATTATTCTGATGATGGCAACTACTACTGTTTGCGTGCTTGTGTTAGTGTTCCTGTTTCTGTCAT CTATGATAAAGAAACTAAAACCCACGCTACTCTATTTGGTAGTGTTGCATGTGAACACATTTCTTCTACCATGTCTCAAT ACTCCCGTTCTACGCGATCAATGCTTAAACGGCGAGATTCTACATATGGCCCCCTTCAGACACCTGTTGGTTGTGTCCTA GGACTTGTTAATTCCTCTTTGTTCGTAGAGGACTGCAAGTTGCCTCTTGGTCAATCTCTCTGTGCTCTTCCTGACACACC TAGTACTCTCACACCTCGCAGTGTGCGCTCTGTTCCAGGTGAAATGCGCTTGGCATCCATTGCTTTTAATCATCCTATTC AGGTTGATCAACTTAATAGTAGTTATTTTAAATTAAGTATACCCACTAATTTTTCCTTTGGTGTGACTCAGGAGTACATT CAGACAACCATTCAGAAAGTTACTGTTGATTGTAAACAGTACGTTTGCAATGGTTTCCAGAAGTGTGAGCAATTACTGCG CGAGTATGGCCAGTTTTGTTCCAAAATAAACCAGGCTCTCCATGGTGCCAATTTACGCCAGGATGATTCTGTACGTAATT TGTTTGCGAGCGTGAAAAGCTCTCAATCATCTCCTATCATACCAGGTTTTGGAGGTGACTTTAATTTGACACTTCTAGAA CCTGTTTCTATATCTACTGGCAGTCGTAGTGCACGTAGTGCTATTGAGGATTTGCTATTTGACAAAGTCACTATAGCTGA TCCTGGTTATATGCAAGGTTACGATGATTGCATGCAGCAAGGTCCAGCATCAGCTCGTGATCTTATTTGTGCTCAATATG TGGCTGGTTACAAAGTATTACCTCCTCTTATGGATGTTAATATGGAAGCCGCGTATACTTCATCTTTGCTTGGCAGCATA GCAGGTGTTGGCTGGACTGCTGGCTTATCCTCCTTTGCTGCTATTCCATTTGCACAGAGTATCTTTTATAGGTTAAACGG TGTTGGCATTACTCAACAGGTTCTTTCAGAGAACCAAAAGCTTATTGCCAATAAGTTTAATCAGGCTCTGGGAGCTATGC AAACAGGCTTCACTACAACTAATGAAGCTTTTCAGAAGGTTCAGGATGCTGTGAACAACAATGCACAGGCTCTATCCAAA TTAGCTAGCGAGCTATCTAATACTTTTGGTGCTATTTCCGCCTCTATTGGAGACATCATACAACGTCTTGATGTTCTCGA ACAGGACGCCCAAATAGACAGACTTATTAATGGCCGTTTGACAACACTAAATGCTTTTGTTGCACAGCAGCTTGTTCGTT CCGAATCAGCTGCTCTTTCCGCTCAATTGGCTAAAGATAAAGTCAATGAGTGTGTCAAGGCACAATCCAAGCGTTCTGGA TTTTGCGGTCAAGGCACACATATAGTGTCCTTTGTTGTAAATGCCCCTAATGGCCTTTACTTCATGCATGTTGGTTATTA CCCTAGCAACCACATTGAGGTTGTTTCTGCTTATGGTCTTTGCGATGCAGCTAACCCTACTAATTGTATAGCCCCTGTTA ATGGCTACTTTATTAAAACTAATAACACTAGGATTGTTGATGAGTGGTCATATACTGGCTCGTCCTTCTATGCACCTGAG CCCATTACCTCCCTTAATACTAAGTATGTTGCACCACAGGTGACATACCAAAACATTTCTACTAACCTCCCTCCTCCTCT TCTCGGCAATTCCACCGGGATTGACTTCCAAGATGAGTTGGATGAGTTTTTCAAAAATGTTAGCACCAGTATACCTAATT TTGGTTCCCTAACACAGATTAATACTACATTACTCGATCTTACCTACGAGATGTTGTCTCTTCAACAAGTTGTTAAAGCC CTTAATGAGTCTTACATAGACCTTAAAGAGCTTGGCAATTATACTTATTACAACAAATGGCCGTGGTACATTTGGCTTGG TTTCATTGCTGGGCTTGTTGCCTTAGCTCTATGCGTCTTCTTCATACTGTGCTGCACTGGTTGTGGCACAAACTGTATGG GAAAACTTAAGTGTAATCGTTGTTGTGATAGATACGAGGAATACGACCTCGAGCCGCATAAGGTTCATGTTCACTAATTA ACGAACTATTAATGAGAGTTCAAAGACCACCCACTCTCTTGTTAGTGTTTTCACTCTCTCTTTTGGTCACTGCATCCTCA AAACCTCTCTATGTACCTGAGCATTGTCAGAATTATTCTGGTTGCATGCTTAGGGCTTGTATTAAAACTGCCCAAGCTGA TACAGCTGGTCTTTATACAAATTTTCGAATTGACGTCCCATCTGCAGAATCAACTGGTACTCAATCAGTTTCTGTCGATC TTGAGTCAACTTCAACTCATGATGGTCCTACCGAACATGTTACTAGTGTGAATCTTTTTGACGTTGGTTACTCAGTTAAT TAACGAACTCTATGGATTACGTGTCTCTGCTTAATCAAATTTGGCAGAAGTACCTTAACTCACCGTATACTACTTGTTTG TACATCCCTAAACCCACAGCTAAGTATACACCTTTAGTTGGCACTTCATTGCACCCTGTGCTGTGGAACTGTCAGCTATC CTTTGCTGGTTATACTGAATCTGCTGTTAATTCTACAAAAGCTTTGGCCAAACAGGACGCAGCTCAGCGAATCGCTTGGT TGCTACATAAGGATGGAGGAATCCCTGATGGATGTTCCCTCTACCTCCGGCACTCAAGTTTATTCGCGCAAAGCGAGGAA GAGGAGCCATTCTCCAACTAAGAAACTGCGCTACGTTAAGCGTAGATTTTCTCTTCTGCGCCATGAAGACCTTAGTGTTA TTGTCCAACCAACACACTATGTCAGGGTTACATTTTCAGACCCCAACATGTGGTATCTACGTTCGGGTCATCATTTACAC TCAGTTCACAATTGGCTTAAACCTTATGGCGGCCAACCTGTTTCTGAGTACCATATTACTCTAGCTTTGCTAAATCTCAC TGATGAAGATTTAGCTAGAGATTTTTCACCCATTGCGCTCTTTTTGCGCAATGTCAGATTTGAGCTACATGAGTTCGCCT TGCTGCGCAAAACTCTTGTTCTTAATGCATCAGAGATCTACTGTGCTAACATACATAGATTTAAGCCTGTGTATAGAGTT AACACGGCAATCCCTACTATTAAGGATTGGCTTCTCGTTCAGGGATTTTCCCTTTACCATAGTGGCCTCCCTTTACATAT GTCAATCTCTAAATTGCATGCACTGGATGATGTTACTCGCAATTACATCATTACAATGCCATGCTTTAGAACTTACCCTC AACAAATGTTTGTTACTCCTTTGGCCGTAGATGTTGTCTCCATACGGTCTTCCAATCAGGGTAATAAACAAATTGTTCAT TCTTATCCCATTTTACATCATCCAGGATTTTAACGAACTATGGCTTTCTCGGCGTCTTTATTTAAACCCGTCCAGCTAGT CCCAGTTTCTCCTGCATTTCATCGCATTGAGTCTACTGACTCTATTGTTTTCACATACATTCCTGCTAGCGGCTATGTAG CTGCTTTAGCTGTCAATGTGTGTCTCATTCCCCTATTATTACTGCTACGTCAAGATACTTGTCGTCGCAGCATTATCAGA ACTATGGTTCTCTATTTCCTTGTTCTGTATAACTTTTTATTAGCCATTGTACTAGTCAATGGTGTACATTATCCAACTGG AAGTTGCCTGATAGCCTTCTTAGTTATCCTCATAATACTTTGGTTTGTAGATAGAATTCGTTTCTGTCTCATGCTGAATT CCTACATTCCACTGTTTGACATGCGTTCCCACTTTATTCGTGTTAGTACAGTTTCTTCTCATGGTATGGTCCCTGTAATA CACACCAAACCATTATTTATTAGAAACTTCGATCAGCGTTGCAGCTGTTCTCGTTGTTTTTATTTGCACTCTTCCACTTA TATAGAGTGCACTTATATTAGCCGTTTTAGTAAGATTAGCCTAGTTTCTGTAACTGACTTCTCCTTAAACGGCAATGTTT CCACTGTTTTCGTGCCTGCAACGCGCGATTCAGTTCCTCTTCACATAATCGCCCCGAGCTCGCTTATCGTTTAAGCAGCT CTGCGCTACTATGGGTCCCGTGTAGAGGCTAATCCATTAGTCTCTCTTTGGACATATGGAAAACGAACTATGTTACCCTT TGTCCAAGAACGAATAGGGTTGTTCATAGTAAACTTTTTCATTTTTACCGTAGTATGTGCTATAACACTCTTGGTGTGTA TGGCTTTCCTTACGGCTACTAGATTATGTGTGCAATGTATGACAGGCTTCAATACCCTGTTAGTTCAGCCCGCATTATAC TTGTATAATACTGGACGTTCAGTCTATGTAAAATTCCAGGATAGTAAACCCCCTCTACCACCTGACGAGTGGGTTTAACG AACTCCTTCATAATGTCTAATATGACGCAACTCACTGAGGCGCAGATTATTGCCATTATTAAAGACTGGAACTTTGCATG GTCCCTGATCTTTCTCTTAATTACTATCGTACTACAGTATGGATACCCATCCCGTAGTATGACTGTCTATGTCTTTAAAA TGTTTGTTTTATGGCTCCTATGGCCATCTTCCATGGCGCTATCAATATTTAGCGCCGTTTATCCAATTGATCTAGCTTCC CAGATAATCTCTGGCATTGTAGCAGCTGTTTCAGCTATGATGTGGATTTCCTACTTTGTGCAGAGTATCCGGCTGTTTAT GAGAACTGGATCATGGTGGTCATTCAATCCTGAGACTAATTGCCTTTTGAACGTTCCATTTGGTGGTACAACTGTCGTAC GTCCACTCGTAGAGGACTCTACCAGTGTAACTGCTGTTGTAACCAATGGCCACCTCAAAATGGCTGGCATGCATTTCGGT GCTTGTGACTACGACAGACTTCCTAATGAAGTCACCGTGGCCAAACCCAATGTGCTGATTGCTTTAAAAATGGTGAAGCG GCAAAGCTACGGAACTAATTCCGGCGTTGCCATTTACCATAGATATAAGGCAGGTAATTACAGGAGTCCGCCTATTACGG CGGATATTGAACTTGCATTGCTTCGAGCTTAGGCTCTTTAGTAAGAGTATCTTAATTGATTTTAACGAATCTCAATTTCA TTGTTATGGCATCCCCTGCTGCACCTCGTGCTGTTTCCTTTGCCGATAACAATGATATAACAAATACAAACCTATCTCGA GGTAGAGGACGTAATCCAAAACCACGAGCTGCACCAAATAACACTGTCTCTTGGTACACTGGGCTTACCCAACACGGGAA AGTCCCTCTTACCTTTCCACCTGGGCAGGGTGTACCTCTTAATGCCAATTCTACCCCTGCGCAAAATGCTGGGTATTGGC GGAGACAGGACAGAAAAATTAATACCGGGAATGGAATTAAGCAACTGGCTCCCAGGTGGTACTTCTACTACACTGGAACT GGACCCGAAGCAGCACTCCCATTCCGGGCTGTTAAGGATGGCATCGTTTGGGTCCATGAAGATGGCGCCACTGATGCTCC TTCAACTTTTGGGACGCGGAACCCTAACAATGATTCAGCTATTGTTACACAATTCGCGCCCGGTACTAAGCTTCCTAAAA ACTTCCACATTGAGGGGACTGGAGGCAATAGTCAATCATCTTCAAGAGCCTCTAGCTTAAGCAGAAACTCTTCCAGATCT AGTTCACAAGGTTCAAGATCAGGAAACTCTACCCGCGGCACTTCTCCAGGTCCATCTGGAATCGGAGCAGTAGGAGGTGA TCTACTTTACCTTGATCTTCTGAACAGACTACAAGCCCTTGAGTCTGGCAAAGTAAAGCAATCGCAGCCAAAAGTAATCA CTAAGAAAGATGCTGCTGCTGCTAAAAATAAGATGCGCCACAAGCGCACTTCCACCAAAAGTTTCAACATGGTGCAAGCT TTTGGTCTTCGCGGACCAGGAGACCTCCAGGGAAACTTTGGTGATCTTCAATTGAATAAACTCGGCACTGAGGACCCACG TTGGCCCCAAATTGCTGAGCTTGCTCCTACAGCCAGTGCTTTTATGGGTATGTCGCAATTTAAACTTACCCATCAGAACA ATGATGATCATGGCAACCCTGTGTACTTCCTTCGGTACAGTGGAGCCATTAAACTTGACCCAAAGAATCCCAACTACAAT AAGTGGTTGGAGCTTCTTGAGCAAAATATTGATGCCTACAAAACCTTCCCTAAGAAGGAAAAGAAACAAAAGGCACCAAA AGAAGAATCAACAGACCAAATGTCTGAACCTCCAAAGGAGCAGCGTGTGCAAGGTAGCATCACTCAGCGCACTCGCACCC GTCCAAGTGTTCAGCCTGGTCCAATGATTGATGTTAACACTGATTAGTGTCACTCAAAGTAACAAGATCGCGGCAATCGT TTGTGTTTGGCAACCCCATCTCACCATCGCTTGTCCACTCTTGCACAGAATGGAATCATGTTGTAATTACAGTGCAATAA GGTAATTATAACCCATTTAATTGATAGCTATGCTTTATTAAAGTGTGTAGCTGTAGAGAGAATGTTAAAGACTGTCACCT CTGCTTGATTGCAAGTGAACAGTGCCCCCCGGGAAGAGCTCTACAGTGTGAAATGTAAATAAAAAATAGCTATTATTCAA TTAGATTAGGCTAATTAGATGATTTGCAAAAAAAAAAAA kraken2-2.1.2/data/README.md000066400000000000000000000003611404614725400152500ustar00rootroot00000000000000This directory contains several viral genomes, a lightweight taxonomy, and a simple read simulator. The genomes and taxonomy were obtained from NCBI (with modifications to each to make a minimal filesize and prepare them for Kraken 2 use). kraken2-2.1.2/data/SARS.fa000066400000000000000000000727561404614725400150720ustar00rootroot00000000000000>kraken:taxid|694009|NC_004718.3 SARS coronavirus, complete genome ATATTAGGTTTTTACCTACCCAGGAAAAGCCAACCAACCTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAAT CTGTGTAGCTGTCGCTCGGCTGCATGCCTAGTGCACCTACGCAGTATAAACAATAATAAATTTTACTGTCGTTGACAAGA AACGAGTAACTCGTCCCTCTTCTGCAGACTGCTTACGGTTTCGTCCGTGTTGCAGTCGATCATCAGCATACCTAGGTTTC GTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTTCTTGGTGTCAACGAGAAAACACACGTCCAACTCAGTTTGCC TGTCCTTCAGGTTAGAGACGTGCTAGTGCGTGGCTTCGGGGACTCTGTGGAAGAGGCCCTATCGGAGGCACGTGAACACC TCAAAAATGGCACTTGTGGTCTAGTAGAGCTGGAAAAAGGCGTACTGCCCCAGCTTGAACAGCCCTATGTGTTCATTAAA CGTTCTGATGCCTTAAGCACCAATCACGGCCACAAGGTCGTTGAGCTGGTTGCAGAAATGGACGGCATTCAGTACGGTCG TAGCGGTATAACACTGGGAGTACTCGTGCCACATGTGGGCGAAACCCCAATTGCATACCGCAATGTTCTTCTTCGTAAGA ACGGTAATAAGGGAGCCGGTGGTCATAGCTATGGCATCGATCTAAAGTCTTATGACTTAGGTGACGAGCTTGGCACTGAT CCCATTGAAGATTATGAACAAAACTGGAACACTAAGCATGGCAGTGGTGCACTCCGTGAACTCACTCGTGAGCTCAATGG AGGTGCAGTCACTCGCTATGTCGACAACAATTTCTGTGGCCCAGATGGGTACCCTCTTGATTGCATCAAAGATTTTCTCG CACGCGCGGGCAAGTCAATGTGCACTCTTTCCGAACAACTTGATTACATCGAGTCGAAGAGAGGTGTCTACTGCTGCCGT GACCATGAGCATGAAATTGCCTGGTTCACTGAGCGCTCTGATAAGAGCTACGAGCACCAGACACCCTTCGAAATTAAGAG TGCCAAGAAATTTGACACTTTCAAAGGGGAATGCCCAAAGTTTGTGTTTCCTCTTAACTCAAAAGTCAAAGTCATTCAAC CACGTGTTGAAAAGAAAAAGACTGAGGGTTTCATGGGGCGTATACGCTCTGTGTACCCTGTTGCATCTCCACAGGAGTGT AACAATATGCACTTGTCTACCTTGATGAAATGTAATCATTGCGATGAAGTTTCATGGCAGACGTGCGACTTTCTGAAAGC CACTTGTGAACATTGTGGCACTGAAAATTTAGTTATTGAAGGACCTACTACATGTGGGTACCTACCTACTAATGCTGTAG TGAAAATGCCATGTCCTGCCTGTCAAGACCCAGAGATTGGACCTGAGCATAGTGTTGCAGATTATCACAACCACTCAAAC ATTGAAACTCGACTCCGCAAGGGAGGTAGGACTAGATGTTTTGGAGGCTGTGTGTTTGCCTATGTTGGCTGCTATAATAA GCGTGCCTACTGGGTTCCTCGTGCTAGTGCTGATATTGGCTCAGGCCATACTGGCATTACTGGTGACAATGTGGAGACCT TGAATGAGGATCTCCTTGAGATACTGAGTCGTGAACGTGTTAACATTAACATTGTTGGCGATTTTCATTTGAATGAAGAG GTTGCCATCATTTTGGCATCTTTCTCTGCTTCTACAAGTGCCTTTATTGACACTATAAAGAGTCTTGATTACAAGTCTTT CAAAACCATTGTTGAGTCCTGCGGTAACTATAAAGTTACCAAGGGAAAGCCCGTAAAAGGTGCTTGGAACATTGGACAAC AGAGATCAGTTTTAACACCACTGTGTGGTTTTCCCTCACAGGCTGCTGGTGTTATCAGATCAATTTTTGCGCGCACACTT GATGCAGCAAACCACTCAATTCCTGATTTGCAAAGAGCAGCTGTCACCATACTTGATGGTATTTCTGAACAGTCATTACG TCTTGTCGACGCCATGGTTTATACTTCAGACCTGCTCACCAACAGTGTCATTATTATGGCATATGTAACTGGTGGTCTTG TACAACAGACTTCTCAGTGGTTGTCTAATCTTTTGGGCACTACTGTTGAAAAACTCAGGCCTATCTTTGAATGGATTGAG GCGAAACTTAGTGCAGGAGTTGAATTTCTCAAGGATGCTTGGGAGATTCTCAAATTTCTCATTACAGGTGTTTTTGACAT CGTCAAGGGTCAAATACAGGTTGCTTCAGATAACATCAAGGATTGTGTAAAATGCTTCATTGATGTTGTTAACAAGGCAC TCGAAATGTGCATTGATCAAGTCACTATCGCTGGCGCAAAGTTGCGATCACTCAACTTAGGTGAAGTCTTCATCGCTCAA AGCAAGGGACTTTACCGTCAGTGTATACGTGGCAAGGAGCAGCTGCAACTACTCATGCCTCTTAAGGCACCAAAAGAAGT AACCTTTCTTGAAGGTGATTCACATGACACAGTACTTACCTCTGAGGAGGTTGTTCTCAAGAACGGTGAACTCGAAGCAC TCGAGACGCCCGTTGATAGCTTCACAAATGGAGCTATCGTTGGCACACCAGTCTGTGTAAATGGCCTCATGCTCTTAGAG ATTAAGGACAAAGAACAATACTGCGCATTGTCTCCTGGTTTACTGGCTACAAACAATGTCTTTCGCTTAAAAGGGGGTGC ACCAATTAAAGGTGTAACCTTTGGAGAAGATACTGTTTGGGAAGTTCAAGGTTACAAGAATGTGAGAATCACATTTGAGC TTGATGAACGTGTTGACAAAGTGCTTAATGAAAAGTGCTCTGTCTACACTGTTGAATCCGGTACCGAAGTTACTGAGTTT GCATGTGTTGTAGCAGAGGCTGTTGTGAAGACTTTACAACCAGTTTCTGATCTCCTTACCAACATGGGTATTGATCTTGA TGAGTGGAGTGTAGCTACATTCTACTTATTTGATGATGCTGGTGAAGAAAACTTTTCATCACGTATGTATTGTTCCTTTT ACCCTCCAGATGAGGAAGAAGAGGACGATGCAGAGTGTGAGGAAGAAGAAATTGATGAAACCTGTGAACATGAGTACGGT ACAGAGGATGATTATCAAGGTCTCCCTCTGGAATTTGGTGCCTCAGCTGAAACAGTTCGAGTTGAGGAAGAAGAAGAGGA AGACTGGCTGGATGATACTACTGAGCAATCAGAGATTGAGCCAGAACCAGAACCTACACCTGAAGAACCAGTTAATCAGT TTACTGGTTATTTAAAACTTACTGACAATGTTGCCATTAAATGTGTTGACATCGTTAAGGAGGCACAAAGTGCTAATCCT ATGGTGATTGTAAATGCTGCTAACATACACCTGAAACATGGTGGTGGTGTAGCAGGTGCACTCAACAAGGCAACCAATGG TGCCATGCAAAAGGAGAGTGATGATTACATTAAGCTAAATGGCCCTCTTACAGTAGGAGGGTCTTGTTTGCTTTCTGGAC ATAATCTTGCTAAGAAGTGTCTGCATGTTGTTGGACCTAACCTAAATGCAGGTGAGGACATCCAGCTTCTTAAGGCAGCA TATGAAAATTTCAATTCACAGGACATCTTACTTGCACCATTGTTGTCAGCAGGCATATTTGGTGCTAAACCACTTCAGTC TTTACAAGTGTGCGTGCAGACGGTTCGTACACAGGTTTATATTGCAGTCAATGACAAAGCTCTTTATGAGCAGGTTGTCA TGGATTATCTTGATAACCTGAAGCCTAGAGTGGAAGCACCTAAACAAGAGGAGCCACCAAACACAGAAGATTCCAAAACT GAGGAGAAATCTGTCGTACAGAAGCCTGTCGATGTGAAGCCAAAAATTAAGGCCTGCATTGATGAGGTTACCACAACACT GGAAGAAACTAAGTTTCTTACCAATAAGTTACTCTTGTTTGCTGATATCAATGGTAAGCTTTACCATGATTCTCAGAACA TGCTTAGAGGTGAAGATATGTCTTTCCTTGAGAAGGATGCACCTTACATGGTAGGTGATGTTATCACTAGTGGTGATATC ACTTGTGTTGTAATACCCTCCAAAAAGGCTGGTGGCACTACTGAGATGCTCTCAAGAGCTTTGAAGAAAGTGCCAGTTGA TGAGTATATAACCACGTACCCTGGACAAGGATGTGCTGGTTATACACTTGAGGAAGCTAAGACTGCTCTTAAGAAATGCA AATCTGCATTTTATGTACTACCTTCAGAAGCACCTAATGCTAAGGAAGAGATTCTAGGAACTGTATCCTGGAATTTGAGA GAAATGCTTGCTCATGCTGAAGAGACAAGAAAATTAATGCCTATATGCATGGATGTTAGAGCCATAATGGCAACCATCCA ACGTAAGTATAAAGGAATTAAAATTCAAGAGGGCATCGTTGACTATGGTGTCCGATTCTTCTTTTATACTAGTAAAGAGC CTGTAGCTTCTATTATTACGAAGCTGAACTCTCTAAATGAGCCGCTTGTCACAATGCCAATTGGTTATGTGACACATGGT TTTAATCTTGAAGAGGCTGCGCGCTGTATGCGTTCTCTTAAAGCTCCTGCCGTAGTGTCAGTATCATCACCAGATGCTGT TACTACATATAATGGATACCTCACTTCGTCATCAAAGACATCTGAGGAGCACTTTGTAGAAACAGTTTCTTTGGCTGGCT CTTACAGAGATTGGTCCTATTCAGGACAGCGTACAGAGTTAGGTGTTGAATTTCTTAAGCGTGGTGACAAAATTGTGTAC CACACTCTGGAGAGCCCCGTCGAGTTTCATCTTGACGGTGAGGTTCTTTCACTTGACAAACTAAAGAGTCTCTTATCCCT GCGGGAGGTTAAGACTATAAAAGTGTTCACAACTGTGGACAACACTAATCTCCACACACAGCTTGTGGATATGTCTATGA CATATGGACAGCAGTTTGGTCCAACATACTTGGATGGTGCTGATGTTACAAAAATTAAACCTCATGTAAATCATGAGGGT AAGACTTTCTTTGTACTACCTAGTGATGACACACTACGTAGTGAAGCTTTCGAGTACTACCATACTCTTGATGAGAGTTT TCTTGGTAGGTACATGTCTGCTTTAAACCACACAAAGAAATGGAAATTTCCTCAAGTTGGTGGTTTAACTTCAATTAAAT GGGCTGATAACAATTGTTATTTGTCTAGTGTTTTATTAGCACTTCAACAGCTTGAAGTCAAATTCAATGCACCAGCACTT CAAGAGGCTTATTATAGAGCCCGTGCTGGTGATGCTGCTAACTTTTGTGCACTCATACTCGCTTACAGTAATAAAACTGT TGGCGAGCTTGGTGATGTCAGAGAAACTATGACCCATCTTCTACAGCATGCTAATTTGGAATCTGCAAAGCGAGTTCTTA ATGTGGTGTGTAAACATTGTGGTCAGAAAACTACTACCTTAACGGGTGTAGAAGCTGTGATGTATATGGGTACTCTATCT TATGATAATCTTAAGACAGGTGTTTCCATTCCATGTGTGTGTGGTCGTGATGCTACACAATATCTAGTACAACAAGAGTC TTCTTTTGTTATGATGTCTGCACCACCTGCTGAGTATAAATTACAGCAAGGTACATTCTTATGTGCGAATGAGTACACTG GTAACTATCAGTGTGGTCATTACACTCATATAACTGCTAAGGAGACCCTCTATCGTATTGACGGAGCTCACCTTACAAAG ATGTCAGAGTACAAAGGACCAGTGACTGATGTTTTCTACAAGGAAACATCTTACACTACAACCATCAAGCCTGTGTCGTA TAAACTCGATGGAGTTACTTACACAGAGATTGAACCAAAATTGGATGGGTATTATAAAAAGGATAATGCTTACTATACAG AGCAGCCTATAGACCTTGTACCAACTCAACCATTACCAAATGCGAGTTTTGATAATTTCAAACTCACATGTTCTAACACA AAATTTGCTGATGATTTAAATCAAATGACAGGCTTCACAAAGCCAGCTTCACGAGAGCTATCTGTCACATTCTTCCCAGA CTTGAATGGCGATGTAGTGGCTATTGACTATAGACACTATTCAGCGAGTTTCAAGAAAGGTGCTAAATTACTGCATAAGC CAATTGTTTGGCACATTAACCAGGCTACAACCAAGACAACGTTCAAACCAAACACTTGGTGTTTACGTTGTCTTTGGAGT ACAAAGCCAGTAGATACTTCAAATTCATTTGAAGTTCTGGCAGTAGAAGACACACAAGGAATGGACAATCTTGCTTGTGA AAGTCAACAACCCACCTCTGAAGAAGTAGTGGAAAATCCTACCATACAGAAGGAAGTCATAGAGTGTGACGTGAAAACTA CCGAAGTTGTAGGCAATGTCATACTTAAACCATCAGATGAAGGTGTTAAAGTAACACAAGAGTTAGGTCATGAGGATCTT ATGGCTGCTTATGTGGAAAACACAAGCATTACCATTAAGAAACCTAATGAGCTTTCACTAGCCTTAGGTTTAAAAACAAT TGCCACTCATGGTATTGCTGCAATTAATAGTGTTCCTTGGAGTAAAATTTTGGCTTATGTCAAACCATTCTTAGGACAAG CAGCAATTACAACATCAAATTGCGCTAAGAGATTAGCACAACGTGTGTTTAACAATTATATGCCTTATGTGTTTACATTA TTGTTCCAATTGTGTACTTTTACTAAAAGTACCAATTCTAGAATTAGAGCTTCACTACCTACAACTATTGCTAAAAATAG TGTTAAGAGTGTTGCTAAATTATGTTTGGATGCCGGCATTAATTATGTGAAGTCACCCAAATTTTCTAAATTGTTCACAA TCGCTATGTGGCTATTGTTGTTAAGTATTTGCTTAGGTTCTCTAATCTGTGTAACTGCTGCTTTTGGTGTACTCTTATCT AATTTTGGTGCTCCTTCTTATTGTAATGGCGTTAGAGAATTGTATCTTAATTCGTCTAACGTTACTACTATGGATTTCTG TGAAGGTTCTTTTCCTTGCAGCATTTGTTTAAGTGGATTAGACTCCCTTGATTCTTATCCAGCTCTTGAAACCATTCAGG TGACGATTTCATCGTACAAGCTAGACTTGACAATTTTAGGTCTGGCCGCTGAGTGGGTTTTGGCATATATGTTGTTCACA AAATTCTTTTATTTATTAGGTCTTTCAGCTATAATGCAGGTGTTCTTTGGCTATTTTGCTAGTCATTTCATCAGCAATTC TTGGCTCATGTGGTTTATCATTAGTATTGTACAAATGGCACCCGTTTCTGCAATGGTTAGGATGTACATCTTCTTTGCTT CTTTCTACTACATATGGAAGAGCTATGTTCATATCATGGATGGTTGCACCTCTTCGACTTGCATGATGTGCTATAAGCGC AATCGTGCCACACGCGTTGAGTGTACAACTATTGTTAATGGCATGAAGAGATCTTTCTATGTCTATGCAAATGGAGGCCG TGGCTTCTGCAAGACTCACAATTGGAATTGTCTCAATTGTGACACATTTTGCACTGGTAGTACATTCATTAGTGATGAAG TTGCTCGTGATTTGTCACTCCAGTTTAAAAGACCAATCAACCCTACTGACCAGTCATCGTATATTGTTGATAGTGTTGCT GTGAAAAATGGCGCGCTTCACCTCTACTTTGACAAGGCTGGTCAAAAGACCTATGAGAGACATCCGCTCTCCCATTTTGT CAATTTAGACAATTTGAGAGCTAACAACACTAAAGGTTCACTGCCTATTAATGTCATAGTTTTTGATGGCAAGTCCAAAT GCGACGAGTCTGCTTCTAAGTCTGCTTCTGTGTACTACAGTCAGCTGATGTGCCAACCTATTCTGTTGCTTGACCAAGCT CTTGTATCAGACGTTGGAGATAGTACTGAAGTTTCCGTTAAGATGTTTGATGCTTATGTCGACACCTTTTCAGCAACTTT TAGTGTTCCTATGGAAAAACTTAAGGCACTTGTTGCTACAGCTCACAGCGAGTTAGCAAAGGGTGTAGCTTTAGATGGTG TCCTTTCTACATTCGTGTCAGCTGCCCGACAAGGTGTTGTTGATACCGATGTTGACACAAAGGATGTTATTGAATGTCTC AAACTTTCACATCACTCTGACTTAGAAGTGACAGGTGACAGTTGTAACAATTTCATGCTCACCTATAATAAGGTTGAAAA CATGACGCCCAGAGATCTTGGCGCATGTATTGACTGTAATGCAAGGCATATCAATGCCCAAGTAGCAAAAAGTCACAATG TTTCACTCATCTGGAATGTAAAAGACTACATGTCTTTATCTGAACAGCTGCGTAAACAAATTCGTAGTGCTGCCAAGAAG AACAACATACCTTTTAGACTAACTTGTGCTACAACTAGACAGGTTGTCAATGTCATAACTACTAAAATCTCACTCAAGGG TGGTAAGATTGTTAGTACTTGTTTTAAACTTATGCTTAAGGCCACATTATTGTGCGTTCTTGCTGCATTGGTTTGTTATA TCGTTATGCCAGTACATACATTGTCAATCCATGATGGTTACACAAATGAAATCATTGGTTACAAAGCCATTCAGGATGGT GTCACTCGTGACATCATTTCTACTGATGATTGTTTTGCAAATAAACATGCTGGTTTTGACGCATGGTTTAGCCAGCGTGG TGGTTCATACAAAAATGACAAAAGCTGCCCTGTAGTAGCTGCTATCATTACAAGAGAGATTGGTTTCATAGTGCCTGGCT TACCGGGTACTGTGCTGAGAGCAATCAATGGTGACTTCTTGCATTTTCTACCTCGTGTTTTTAGTGCTGTTGGCAACATT TGCTACACACCTTCCAAACTCATTGAGTATAGTGATTTTGCTACCTCTGCTTGCGTTCTTGCTGCTGAGTGTACAATTTT TAAGGATGCTATGGGCAAACCTGTGCCATATTGTTATGACACTAATTTGCTAGAGGGTTCTATTTCTTATAGTGAGCTTC GTCCAGACACTCGTTATGTGCTTATGGATGGTTCCATCATACAGTTTCCTAACACTTACCTGGAGGGTTCTGTTAGAGTA GTAACAACTTTTGATGCTGAGTACTGTAGACATGGTACATGCGAAAGGTCAGAAGTAGGTATTTGCCTATCTACCAGTGG TAGATGGGTTCTTAATAATGAGCATTACAGAGCTCTATCAGGAGTTTTCTGTGGTGTTGATGCGATGAATCTCATAGCTA ACATCTTTACTCCTCTTGTGCAACCTGTGGGTGCTTTAGATGTGTCTGCTTCAGTAGTGGCTGGTGGTATTATTGCCATA TTGGTGACTTGTGCTGCCTACTACTTTATGAAATTCAGACGTGTTTTTGGTGAGTACAACCATGTTGTTGCTGCTAATGC ACTTTTGTTTTTGATGTCTTTCACTATACTCTGTCTGGTACCAGCTTACAGCTTTCTGCCGGGAGTCTACTCAGTCTTTT ACTTGTACTTGACATTCTATTTCACCAATGATGTTTCATTCTTGGCTCACCTTCAATGGTTTGCCATGTTTTCTCCTATT GTGCCTTTTTGGATAACAGCAATCTATGTATTCTGTATTTCTCTGAAGCACTGCCATTGGTTCTTTAACAACTATCTTAG GAAAAGAGTCATGTTTAATGGAGTTACATTTAGTACCTTCGAGGAGGCTGCTTTGTGTACCTTTTTGCTCAACAAGGAAA TGTACCTAAAATTGCGTAGCGAGACACTGTTGCCACTTACACAGTATAACAGGTATCTTGCTCTATATAACAAGTACAAG TATTTCAGTGGAGCCTTAGATACTACCAGCTATCGTGAAGCAGCTTGCTGCCACTTAGCAAAGGCTCTAAATGACTTTAG CAACTCAGGTGCTGATGTTCTCTACCAACCACCACAGACATCAATCACTTCTGCTGTTCTGCAGAGTGGTTTTAGGAAAA TGGCATTCCCGTCAGGCAAAGTTGAAGGGTGCATGGTACAAGTAACCTGTGGAACTACAACTCTTAATGGATTGTGGTTG GATGACACAGTATACTGTCCAAGACATGTCATTTGCACAGCAGAAGACATGCTTAATCCTAACTATGAAGATCTGCTCAT TCGCAAATCCAACCATAGCTTTCTTGTTCAGGCTGGCAATGTTCAACTTCGTGTTATTGGCCATTCTATGCAAAATTGTC TGCTTAGGCTTAAAGTTGATACTTCTAACCCTAAGACACCCAAGTATAAATTTGTCCGTATCCAACCTGGTCAAACATTT TCAGTTCTAGCATGCTACAATGGTTCACCATCTGGTGTTTATCAGTGTGCCATGAGACCTAATCATACCATTAAAGGTTC TTTCCTTAATGGATCATGTGGTAGTGTTGGTTTTAACATTGATTATGATTGCGTGTCTTTCTGCTATATGCATCATATGG AGCTTCCAACAGGAGTACACGCTGGTACTGACTTAGAAGGTAAATTCTATGGTCCATTTGTTGACAGACAAACTGCACAG GCTGCAGGTACAGACACAACCATAACATTAAATGTTTTGGCATGGCTGTATGCTGCTGTTATCAATGGTGATAGGTGGTT TCTTAATAGATTCACCACTACTTTGAATGACTTTAACCTTGTGGCAATGAAGTACAACTATGAACCTTTGACACAAGATC ATGTTGACATATTGGGACCTCTTTCTGCTCAAACAGGAATTGCCGTCTTAGATATGTGTGCTGCTTTGAAAGAGCTGCTG CAGAATGGTATGAATGGTCGTACTATCCTTGGTAGCACTATTTTAGAAGATGAGTTTACACCATTTGATGTTGTTAGACA ATGCTCTGGTGTTACCTTCCAAGGTAAGTTCAAGAAAATTGTTAAGGGCACTCATCATTGGATGCTTTTAACTTTCTTGA CATCACTATTGATTCTTGTTCAAAGTACACAGTGGTCACTGTTTTTCTTTGTTTACGAGAATGCTTTCTTGCCATTTACT CTTGGTATTATGGCAATTGCTGCATGTGCTATGCTGCTTGTTAAGCATAAGCACGCATTCTTGTGCTTGTTTCTGTTACC TTCTCTTGCAACAGTTGCTTACTTTAATATGGTCTACATGCCTGCTAGCTGGGTGATGCGTATCATGACATGGCTTGAAT TGGCTGACACTAGCTTGTCTGGTTATAGGCTTAAGGATTGTGTTATGTATGCTTCAGCTTTAGTTTTGCTTATTCTCATG ACAGCTCGCACTGTTTATGATGATGCTGCTAGACGTGTTTGGACACTGATGAATGTCATTACACTTGTTTACAAAGTCTA CTATGGTAATGCTTTAGATCAAGCTATTTCCATGTGGGCCTTAGTTATTTCTGTAACCTCTAACTATTCTGGTGTCGTTA CGACTATCATGTTTTTAGCTAGAGCTATAGTGTTTGTGTGTGTTGAGTATTACCCATTGTTATTTATTACTGGCAACACC TTACAGTGTATCATGCTTGTTTATTGTTTCTTAGGCTATTGTTGCTGCTGCTACTTTGGCCTTTTCTGTTTACTCAACCG TTACTTCAGGCTTACTCTTGGTGTTTATGACTACTTGGTCTCTACACAAGAATTTAGGTATATGAACTCCCAGGGGCTTT TGCCTCCTAAGAGTAGTATTGATGCTTTCAAGCTTAACATTAAGTTGTTGGGTATTGGAGGTAAACCATGTATCAAGGTT GCTACTGTACAGTCTAAAATGTCTGACGTAAAGTGCACATCTGTGGTACTGCTCTCGGTTCTTCAACAACTTAGAGTAGA GTCATCTTCTAAATTGTGGGCACAATGTGTACAACTCCACAATGATATTCTTCTTGCAAAAGACACAACTGAAGCTTTCG AGAAGATGGTTTCTCTTTTGTCTGTTTTGCTATCCATGCAGGGTGCTGTAGACATTAATAGGTTGTGCGAGGAAATGCTC GATAACCGTGCTACTCTTCAGGCTATTGCTTCAGAATTTAGTTCTTTACCATCATATGCCGCTTATGCCACTGCCCAGGA GGCCTATGAGCAGGCTGTAGCTAATGGTGATTCTGAAGTCGTTCTCAAAAAGTTAAAGAAATCTTTGAATGTGGCTAAAT CTGAGTTTGACCGTGATGCTGCCATGCAACGCAAGTTGGAAAAGATGGCAGATCAGGCTATGACCCAAATGTACAAACAG GCAAGATCTGAGGACAAGAGGGCAAAAGTAACTAGTGCTATGCAAACAATGCTCTTCACTATGCTTAGGAAGCTTGATAA TGATGCACTTAACAACATTATCAACAATGCGCGTGATGGTTGTGTTCCACTCAACATCATACCATTGACTACAGCAGCCA AACTCATGGTTGTTGTCCCTGATTATGGTACCTACAAGAACACTTGTGATGGTAACACCTTTACATATGCATCTGCACTC TGGGAAATCCAGCAAGTTGTTGATGCGGATAGCAAGATTGTTCAACTTAGTGAAATTAACATGGACAATTCACCAAATTT GGCTTGGCCTCTTATTGTTACAGCTCTAAGAGCCAACTCAGCTGTTAAACTACAGAATAATGAACTGAGTCCAGTAGCAC TACGACAGATGTCCTGTGCGGCTGGTACCACACAAACAGCTTGTACTGATGACAATGCACTTGCCTACTATAACAATTCG AAGGGAGGTAGGTTTGTGCTGGCATTACTATCAGACCACCAAGATCTCAAATGGGCTAGATTCCCTAAGAGTGATGGTAC AGGTACAATTTACACAGAACTGGAACCACCTTGTAGGTTTGTTACAGACACACCAAAAGGGCCTAAAGTGAAATACTTGT ACTTCATCAAAGGCTTAAACAACCTAAATAGAGGTATGGTGCTGGGCAGTTTAGCTGCTACAGTACGTCTTCAGGCTGGA AATGCTACAGAAGTACCTGCCAATTCAACTGTGCTTTCCTTCTGTGCTTTTGCAGTAGACCCTGCTAAAGCATATAAGGA TTACCTAGCAAGTGGAGGACAACCAATCACCAACTGTGTGAAGATGTTGTGTACACACACTGGTACAGGACAGGCAATTA CTGTAACACCAGAAGCTAACATGGACCAAGAGTCCTTTGGTGGTGCTTCATGTTGTCTGTATTGTAGATGCCACATTGAC CATCCAAATCCTAAAGGATTCTGTGACTTGAAAGGTAAGTACGTCCAAATACCTACCACTTGTGCTAATGACCCAGTGGG TTTTACACTTAGAAACACAGTCTGTACCGTCTGCGGAATGTGGAAAGGTTATGGCTGTAGTTGTGACCAACTCCGCGAAC CCTTGATGCAGTCTGCGGATGCATCAACGTTTTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACACCGTGCGGCA CAGGCACTAGTACTGATGTCGTCTACAGGGCTTTTGATATTTACAACGAAAAAGTTGCTGGTTTTGCAAAGTTCCTAAAA ACTAATTGCTGTCGCTTCCAGGAGAAGGATGAGGAAGGCAATTTATTAGACTCTTACTTTGTAGTTAAGAGGCATACTAT GTCTAACTACCAACATGAAGAGACTATTTATAACTTGGTTAAAGATTGTCCAGCGGTTGCTGTCCATGACTTTTTCAAGT TTAGAGTAGATGGTGACATGGTACCACATATATCACGTCAGCGTCTAACTAAATACACAATGGCTGATTTAGTCTATGCT CTACGTCATTTTGATGAGGGTAATTGTGATACATTAAAAGAAATACTCGTCACATACAATTGCTGTGATGATGATTATTT CAATAAGAAGGATTGGTATGACTTCGTAGAGAATCCTGACATCTTACGCGTATATGCTAACTTAGGTGAGCGTGTACGCC AATCATTATTAAAGACTGTACAATTCTGCGATGCTATGCGTGATGCAGGCATTGTAGGCGTACTGACATTAGATAATCAG GATCTTAATGGGAACTGGTACGATTTCGGTGATTTCGTACAAGTAGCACCAGGCTGCGGAGTTCCTATTGTGGATTCATA TTACTCATTGCTGATGCCCATCCTCACTTTGACTAGGGCATTGGCTGCTGAGTCCCATATGGATGCTGATCTCGCAAAAC CACTTATTAAGTGGGATTTGCTGAAATATGATTTTACGGAAGAGAGACTTTGTCTCTTCGACCGTTATTTTAAATATTGG GACCAGACATACCATCCCAATTGTATTAACTGTTTGGATGATAGGTGTATCCTTCATTGTGCAAACTTTAATGTGTTATT TTCTACTGTGTTTCCACCTACAAGTTTTGGACCACTAGTAAGAAAAATATTTGTAGATGGTGTTCCTTTTGTTGTTTCAA CTGGATACCATTTTCGTGAGTTAGGAGTCGTACATAATCAGGATGTAAACTTACATAGCTCGCGTCTCAGTTTCAAGGAA CTTTTAGTGTATGCTGCTGATCCAGCTATGCATGCAGCTTCTGGCAATTTATTGCTAGATAAACGCACTACATGCTTTTC AGTAGCTGCACTAACAAACAATGTTGCTTTTCAAACTGTCAAACCCGGTAATTTTAATAAAGACTTTTATGACTTTGCTG TGTCTAAAGGTTTCTTTAAGGAAGGAAGTTCTGTTGAACTAAAACACTTCTTCTTTGCTCAGGATGGCAACGCTGCTATC AGTGATTATGACTATTATCGTTATAATCTGCCAACAATGTGTGATATCAGACAACTCCTATTCGTAGTTGAAGTTGTTGA TAAATACTTTGATTGTTACGATGGTGGCTGTATTAATGCCAACCAAGTAATCGTTAACAATCTGGATAAATCAGCTGGTT TCCCATTTAATAAATGGGGTAAGGCTAGACTTTATTATGACTCAATGAGTTATGAGGATCAAGATGCACTTTTCGCGTAT ACTAAGCGTAATGTCATCCCTACTATAACTCAAATGAATCTTAAGTATGCCATTAGTGCAAAGAATAGAGCTCGCACCGT AGCTGGTGTCTCTATCTGTAGTACTATGACAAATAGACAGTTTCATCAGAAATTATTGAAGTCAATAGCCGCCACTAGAG GAGCTACTGTGGTAATTGGAACAAGCAAGTTTTACGGTGGCTGGCATAATATGTTAAAAACTGTTTACAGTGATGTAGAA ACTCCACACCTTATGGGTTGGGATTATCCAAAATGTGACAGAGCCATGCCTAACATGCTTAGGATAATGGCCTCTCTTGT TCTTGCTCGCAAACATAACACTTGCTGTAACTTATCACACCGTTTCTACAGGTTAGCTAACGAGTGTGCGCAAGTATTAA GTGAGATGGTCATGTGTGGCGGCTCACTATATGTTAAACCAGGTGGAACATCATCCGGTGATGCTACAACTGCTTATGCT AATAGTGTCTTTAACATTTGTCAAGCTGTTACAGCCAATGTAAATGCACTTCTTTCAACTGATGGTAATAAGATAGCTGA CAAGTATGTCCGCAATCTACAACACAGGCTCTATGAGTGTCTCTATAGAAATAGGGATGTTGATCATGAATTCGTGGATG AGTTTTACGCTTACCTGCGTAAACATTTCTCCATGATGATTCTTTCTGATGATGCCGTTGTGTGCTATAACAGTAACTAT GCGGCTCAAGGTTTAGTAGCTAGCATTAAGAACTTTAAGGCAGTTCTTTATTATCAAAATAATGTGTTCATGTCTGAGGC AAAATGTTGGACTGAGACTGACCTTACTAAAGGACCTCACGAATTTTGCTCACAGCATACAATGCTAGTTAAACAAGGAG ATGATTACGTGTACCTGCCTTACCCAGATCCATCAAGAATATTAGGCGCAGGCTGTTTTGTCGATGATATTGTCAAAACA GATGGTACACTTATGATTGAAAGGTTCGTGTCACTGGCTATTGATGCTTACCCACTTACAAAACATCCTAATCAGGAGTA TGCTGATGTCTTTCACTTGTATTTACAATACATTAGAAAGTTACATGATGAGCTTACTGGCCACATGTTGGACATGTATT CCGTAATGCTAACTAATGATAACACCTCACGGTACTGGGAACCTGAGTTTTATGAGGCTATGTACACACCACATACAGTC TTGCAGGCTGTAGGTGCTTGTGTATTGTGCAATTCACAGACTTCACTTCGTTGCGGTGCCTGTATTAGGAGACCATTCCT ATGTTGCAAGTGCTGCTATGACCATGTCATTTCAACATCACACAAATTAGTGTTGTCTGTTAATCCCTATGTTTGCAATG CCCCAGGTTGTGATGTCACTGATGTGACACAACTGTATCTAGGAGGTATGAGCTATTATTGCAAGTCACATAAGCCTCCC ATTAGTTTTCCATTATGTGCTAATGGTCAGGTTTTTGGTTTATACAAAAACACATGTGTAGGCAGTGACAATGTCACTGA CTTCAATGCGATAGCAACATGTGATTGGACTAATGCTGGCGATTACATACTTGCCAACACTTGTACTGAGAGACTCAAGC TTTTCGCAGCAGAAACGCTCAAAGCCACTGAGGAAACATTTAAGCTGTCATATGGTATTGCCACTGTACGCGAAGTACTC TCTGACAGAGAATTGCATCTTTCATGGGAGGTTGGAAAACCTAGACCACCATTGAACAGAAACTATGTCTTTACTGGTTA CCGTGTAACTAAAAATAGTAAAGTACAGATTGGAGAGTACACCTTTGAAAAAGGTGACTATGGTGATGCTGTTGTGTACA GAGGTACTACGACATACAAGTTGAATGTTGGTGATTACTTTGTGTTGACATCTCACACTGTAATGCCACTTAGTGCACCT ACTCTAGTGCCACAAGAGCACTATGTGAGAATTACTGGCTTGTACCCAACACTCAACATCTCAGATGAGTTTTCTAGCAA TGTTGCAAATTATCAAAAGGTCGGCATGCAAAAGTACTCTACACTCCAAGGACCACCTGGTACTGGTAAGAGTCATTTTG CCATCGGACTTGCTCTCTATTACCCATCTGCTCGCATAGTGTATACGGCATGCTCTCATGCAGCTGTTGATGCCCTATGT GAAAAGGCATTAAAATATTTGCCCATAGATAAATGTAGTAGAATCATACCTGCGCGTGCGCGCGTAGAGTGTTTTGATAA ATTCAAAGTGAATTCAACACTAGAACAGTATGTTTTCTGCACTGTAAATGCATTGCCAGAAACAACTGCTGACATTGTAG TCTTTGATGAAATCTCTATGGCTACTAATTATGACTTGAGTGTTGTCAATGCTAGACTTCGTGCAAAACACTACGTCTAT ATTGGCGATCCTGCTCAATTACCAGCCCCCCGCACATTGCTGACTAAAGGCACACTAGAACCAGAATATTTTAATTCAGT GTGCAGACTTATGAAAACAATAGGTCCAGACATGTTCCTTGGAACTTGTCGCCGTTGTCCTGCTGAAATTGTTGACACTG TGAGTGCTTTAGTTTATGACAATAAGCTAAAAGCACACAAGGATAAGTCAGCTCAATGCTTCAAAATGTTCTACAAAGGT GTTATTACACATGATGTTTCATCTGCAATCAACAGACCTCAAATAGGCGTTGTAAGAGAATTTCTTACACGCAATCCTGC TTGGAGAAAAGCTGTTTTTATCTCACCTTATAATTCACAGAACGCTGTAGCTTCAAAAATCTTAGGATTGCCTACGCAGA CTGTTGATTCATCACAGGGTTCTGAATATGACTATGTCATATTCACACAAACTACTGAAACAGCACACTCTTGTAATGTC AACCGCTTCAATGTGGCTATCACAAGGGCAAAAATTGGCATTTTGTGCATAATGTCTGATAGAGATCTTTATGACAAACT GCAATTTACAAGTCTAGAAATACCACGTCGCAATGTGGCTACATTACAAGCAGAAAATGTAACTGGACTTTTTAAGGACT GTAGTAAGATCATTACTGGTCTTCATCCTACACAGGCACCTACACACCTCAGCGTTGATATAAAGTTCAAGACTGAAGGA TTATGTGTTGACATACCAGGCATACCAAAGGACATGACCTACCGTAGACTCATCTCTATGATGGGTTTCAAAATGAATTA CCAAGTCAATGGTTACCCTAATATGTTTATCACCCGCGAAGAAGCTATTCGTCACGTTCGTGCGTGGATTGGCTTTGATG TAGAGGGCTGTCATGCAACTAGAGATGCTGTGGGTACTAACCTACCTCTCCAGCTAGGATTTTCTACAGGTGTTAACTTA GTAGCTGTACCGACTGGTTATGTTGACACTGAAAATAACACAGAATTCACCAGAGTTAATGCAAAACCTCCACCAGGTGA CCAGTTTAAACATCTTATACCACTCATGTATAAAGGCTTGCCCTGGAATGTAGTGCGTATTAAGATAGTACAAATGCTCA GTGATACACTGAAAGGATTGTCAGACAGAGTCGTGTTCGTCCTTTGGGCGCATGGCTTTGAGCTTACATCAATGAAGTAC TTTGTCAAGATTGGACCTGAAAGAACGTGTTGTCTGTGTGACAAACGTGCAACTTGCTTTTCTACTTCATCAGATACTTA TGCCTGCTGGAATCATTCTGTGGGTTTTGACTATGTCTATAACCCATTTATGATTGATGTTCAGCAGTGGGGCTTTACGG GTAACCTTCAGAGTAACCATGACCAACATTGCCAGGTACATGGAAATGCACATGTGGCTAGTTGTGATGCTATCATGACT AGATGTTTAGCAGTCCATGAGTGCTTTGTTAAGCGCGTTGATTGGTCTGTTGAATACCCTATTATAGGAGATGAACTGAG GGTTAATTCTGCTTGCAGAAAAGTACAACACATGGTTGTGAAGTCTGCATTGCTTGCTGATAAGTTTCCAGTTCTTCATG ACATTGGAAATCCAAAGGCTATCAAGTGTGTGCCTCAGGCTGAAGTAGAATGGAAGTTCTACGATGCTCAGCCATGTAGT GACAAAGCTTACAAAATAGAGGAACTCTTCTATTCTTATGCTACACATCACGATAAATTCACTGATGGTGTTTGTTTGTT TTGGAATTGTAACGTTGATCGTTACCCAGCCAATGCAATTGTGTGTAGGTTTGACACAAGAGTCTTGTCAAACTTGAACT TACCAGGCTGTGATGGTGGTAGTTTGTATGTGAATAAGCATGCATTCCACACTCCAGCTTTCGATAAAAGTGCATTTACT AATTTAAAGCAATTGCCTTTCTTTTACTATTCTGATAGTCCTTGTGAGTCTCATGGCAAACAAGTAGTGTCGGATATTGA TTATGTTCCACTCAAATCTGCTACGTGTATTACACGATGCAATTTAGGTGGTGCTGTTTGCAGACACCATGCAAATGAGT ACCGACAGTACTTGGATGCATATAATATGATGATTTCTGCTGGATTTAGCCTATGGATTTACAAACAATTTGATACTTAT AACCTGTGGAATACATTTACCAGGTTACAGAGTTTAGAAAATGTGGCTTATAATGTTGTTAATAAAGGACACTTTGATGG ACACGCCGGCGAAGCACCTGTTTCCATCATTAATAATGCTGTTTACACAAAGGTAGATGGTATTGATGTGGAGATCTTTG AAAATAAGACAACACTTCCTGTTAATGTTGCATTTGAGCTTTGGGCTAAGCGTAACATTAAACCAGTGCCAGAGATTAAG ATACTCAATAATTTGGGTGTTGATATCGCTGCTAATACTGTAATCTGGGACTACAAAAGAGAAGCCCCAGCACATGTATC TACAATAGGTGTCTGCACAATGACTGACATTGCCAAGAAACCTACTGAGAGTGCTTGTTCTTCACTTACTGTCTTGTTTG ATGGTAGAGTGGAAGGACAGGTAGACCTTTTTAGAAACGCCCGTAATGGTGTTTTAATAACAGAAGGTTCAGTCAAAGGT CTAACACCTTCAAAGGGACCAGCACAAGCTAGCGTCAATGGAGTCACATTAATTGGAGAATCAGTAAAAACACAGTTTAA CTACTTTAAGAAAGTAGACGGCATTATTCAACAGTTGCCTGAAACCTACTTTACTCAGAGCAGAGACTTAGAGGATTTTA AGCCCAGATCACAAATGGAAACTGACTTTCTCGAGCTCGCTATGGATGAATTCATACAGCGATATAAGCTCGAGGGCTAT GCCTTCGAACACATCGTTTATGGAGATTTCAGTCATGGACAACTTGGCGGTCTTCATTTAATGATAGGCTTAGCCAAGCG CTCACAAGATTCACCACTTAAATTAGAGGATTTTATCCCTATGGACAGCACAGTGAAAAATTACTTCATAACAGATGCGC AAACAGGTTCATCAAAATGTGTGTGTTCTGTGATTGATCTTTTACTTGATGACTTTGTCGAGATAATAAAGTCACAAGAT TTGTCAGTGATTTCAAAAGTGGTCAAGGTTACAATTGACTATGCTGAAATTTCATTCATGCTTTGGTGTAAGGATGGACA TGTTGAAACCTTCTACCCAAAACTACAAGCAAGTCAAGCGTGGCAACCAGGTGTTGCGATGCCTAACTTGTACAAGATGC AAAGAATGCTTCTTGAAAAGTGTGACCTTCAGAATTATGGTGAAAATGCTGTTATACCAAAAGGAATAATGATGAATGTC GCAAAGTATACTCAACTGTGTCAATACTTAAATACACTTACTTTAGCTGTACCCTACAACATGAGAGTTATTCACTTTGG TGCTGGCTCTGATAAAGGAGTTGCACCAGGTACAGCTGTGCTCAGACAATGGTTGCCAACTGGCACACTACTTGTCGATT CAGATCTTAATGACTTCGTCTCCGACGCAGATTCTACTTTAATTGGAGACTGTGCAACAGTACATACGGCTAATAAATGG GACCTTATTATTAGCGATATGTATGACCCTAGGACCAAACATGTGACAAAAGAGAATGACTCTAAAGAAGGGTTTTTCAC TTATCTGTGTGGATTTATAAAGCAAAAACTAGCCCTGGGTGGTTCTATAGCTGTAAAGATAACAGAGCATTCTTGGAATG CTGACCTTTACAAGCTTATGGGCCATTTCTCATGGTGGACAGCTTTTGTTACAAATGTAAATGCATCATCATCGGAAGCA TTTTTAATTGGGGCTAACTATCTTGGCAAGCCGAAGGAACAAATTGATGGCTATACCATGCATGCTAACTACATTTTCTG GAGGAACACAAATCCTATCCAGTTGTCTTCCTATTCACTCTTTGACATGAGCAAATTTCCTCTTAAATTAAGAGGAACTG CTGTAATGTCTCTTAAGGAGAATCAAATCAATGATATGATTTATTCTCTTCTGGAAAAAGGTAGGCTTATCATTAGAGAA AACAACAGAGTTGTGGTTTCAAGTGATATTCTTGTTAACAACTAAACGAACATGTTTATTTTCTTATTATTTCTTACTCT CACTAGTGGTAGTGACCTTGACCGGTGCACCACTTTTGATGATGTTCAAGCTCCTAATTACACTCAACATACTTCATCTA TGAGGGGGGTTTACTATCCTGATGAAATTTTTAGATCAGACACTCTTTATTTAACTCAGGATTTATTTCTTCCATTTTAT TCTAATGTTACAGGGTTTCATACTATTAATCATACGTTTGGCAACCCTGTCATACCTTTTAAGGATGGTATTTATTTTGC TGCCACAGAGAAATCAAATGTTGTCCGTGGTTGGGTTTTTGGTTCTACCATGAACAACAAGTCACAGTCGGTGATTATTA TTAACAATTCTACTAATGTTGTTATACGAGCATGTAACTTTGAATTGTGTGACAACCCTTTCTTTGCTGTTTCTAAACCC ATGGGTACACAGACACATACTATGATATTCGATAATGCATTTAATTGCACTTTCGAGTACATATCTGATGCCTTTTCGCT TGATGTTTCAGAAAAGTCAGGTAATTTTAAACACTTACGAGAGTTTGTGTTTAAAAATAAAGATGGGTTTCTCTATGTTT ATAAGGGCTATCAACCTATAGATGTAGTTCGTGATCTACCTTCTGGTTTTAACACTTTGAAACCTATTTTTAAGTTGCCT CTTGGTATTAACATTACAAATTTTAGAGCCATTCTTACAGCCTTTTCACCTGCTCAAGACATTTGGGGCACGTCAGCTGC AGCCTATTTTGTTGGCTATTTAAAGCCAACTACATTTATGCTCAAGTATGATGAAAATGGTACAATCACAGATGCTGTTG ATTGTTCTCAAAATCCACTTGCTGAACTCAAATGCTCTGTTAAGAGCTTTGAGATTGACAAAGGAATTTACCAGACCTCT AATTTCAGGGTTGTTCCCTCAGGAGATGTTGTGAGATTCCCTAATATTACAAACTTGTGTCCTTTTGGAGAGGTTTTTAA TGCTACTAAATTCCCTTCTGTCTATGCATGGGAGAGAAAAAAAATTTCTAATTGTGTTGCTGATTACTCTGTGCTCTACA ACTCAACATTTTTTTCAACCTTTAAGTGCTATGGCGTTTCTGCCACTAAGTTGAATGATCTTTGCTTCTCCAATGTCTAT GCAGATTCTTTTGTAGTCAAGGGAGATGATGTAAGACAAATAGCGCCAGGACAAACTGGTGTTATTGCTGATTATAATTA TAAATTGCCAGATGATTTCATGGGTTGTGTCCTTGCTTGGAATACTAGGAACATTGATGCTACTTCAACTGGTAATTATA ATTATAAATATAGGTATCTTAGACATGGCAAGCTTAGGCCCTTTGAGAGAGACATATCTAATGTGCCTTTCTCCCCTGAT GGCAAACCTTGCACCCCACCTGCTCTTAATTGTTATTGGCCATTAAATGATTATGGTTTTTACACCACTACTGGCATTGG CTACCAACCTTACAGAGTTGTAGTACTTTCTTTTGAACTTTTAAATGCACCGGCCACGGTTTGTGGACCAAAATTATCCA CTGACCTTATTAAGAACCAGTGTGTCAATTTTAATTTTAATGGACTCACTGGTACTGGTGTGTTAACTCCTTCTTCAAAG AGATTTCAACCATTTCAACAATTTGGCCGTGATGTTTCTGATTTCACTGATTCCGTTCGAGATCCTAAAACATCTGAAAT ATTAGACATTTCACCTTGCGCTTTTGGGGGTGTAAGTGTAATTACACCTGGAACAAATGCTTCATCTGAAGTTGCTGTTC TATATCAAGATGTTAACTGCACTGATGTTTCTACAGCAATTCATGCAGATCAACTCACACCAGCTTGGCGCATATATTCT ACTGGAAACAATGTATTCCAGACTCAAGCAGGCTGTCTTATAGGAGCTGAGCATGTCGACACTTCTTATGAGTGCGACAT TCCTATTGGAGCTGGCATTTGTGCTAGTTACCATACAGTTTCTTTATTACGTAGTACTAGCCAAAAATCTATTGTGGCTT ATACTATGTCTTTAGGTGCTGATAGTTCAATTGCTTACTCTAATAACACCATTGCTATACCTACTAACTTTTCAATTAGC ATTACTACAGAAGTAATGCCTGTTTCTATGGCTAAAACCTCCGTAGATTGTAATATGTACATCTGCGGAGATTCTACTGA ATGTGCTAATTTGCTTCTCCAATATGGTAGCTTTTGCACACAACTAAATCGTGCACTCTCAGGTATTGCTGCTGAACAGG ATCGCAACACACGTGAAGTGTTCGCTCAAGTCAAACAAATGTACAAAACCCCAACTTTGAAATATTTTGGTGGTTTTAAT TTTTCACAAATATTACCTGACCCTCTAAAGCCAACTAAGAGGTCTTTTATTGAGGACTTGCTCTTTAATAAGGTGACACT CGCTGATGCTGGCTTCATGAAGCAATATGGCGAATGCCTAGGTGATATTAATGCTAGAGATCTCATTTGTGCGCAGAAGT TCAATGGACTTACAGTGTTGCCACCTCTGCTCACTGATGATATGATTGCTGCCTACACTGCTGCTCTAGTTAGTGGTACT GCCACTGCTGGATGGACATTTGGTGCTGGCGCTGCTCTTCAAATACCTTTTGCTATGCAAATGGCATATAGGTTCAATGG CATTGGAGTTACCCAAAATGTTCTCTATGAGAACCAAAAACAAATCGCCAACCAATTTAACAAGGCGATTAGTCAAATTC AAGAATCACTTACAACAACATCAACTGCATTGGGCAAGCTGCAAGACGTTGTTAACCAGAATGCTCAAGCATTAAACACA CTTGTTAAACAACTTAGCTCTAATTTTGGTGCAATTTCAAGTGTGCTAAATGATATCCTTTCGCGACTTGATAAAGTCGA GGCGGAGGTACAAATTGACAGGTTAATTACAGGCAGACTTCAAAGCCTTCAAACCTATGTAACACAACAACTAATCAGGG CTGCTGAAATCAGGGCTTCTGCTAATCTTGCTGCTACTAAAATGTCTGAGTGTGTTCTTGGACAATCAAAAAGAGTTGAC TTTTGTGGAAAGGGCTACCACCTTATGTCCTTCCCACAAGCAGCCCCGCATGGTGTTGTCTTCCTACATGTCACGTATGT GCCATCCCAGGAGAGGAACTTCACCACAGCGCCAGCAATTTGTCATGAAGGCAAAGCATACTTCCCTCGTGAAGGTGTTT TTGTGTTTAATGGCACTTCTTGGTTTATTACACAGAGGAACTTCTTTTCTCCACAAATAATTACTACAGACAATACATTT GTCTCAGGAAATTGTGATGTCGTTATTGGCATCATTAACAACACAGTTTATGATCCTCTGCAACCTGAGCTTGACTCATT CAAAGAAGAGCTGGACAAGTACTTCAAAAATCATACATCACCAGATGTTGATCTTGGCGACATTTCAGGCATTAACGCTT CTGTCGTCAACATTCAAAAAGAAATTGACCGCCTCAATGAGGTCGCTAAAAATTTAAATGAATCACTCATTGACCTTCAA GAATTGGGAAAATATGAGCAATATATTAAATGGCCTTGGTATGTTTGGCTCGGCTTCATTGCTGGACTAATTGCCATCGT CATGGTTACAATCTTGCTTTGTTGCATGACTAGTTGTTGCAGTTGCCTCAAGGGTGCATGCTCTTGTGGTTCTTGCTGCA AGTTTGATGAGGATGACTCTGAGCCAGTTCTCAAGGGTGTCAAATTACATTACACATAAACGAACTTATGGATTTGTTTA TGAGATTTTTTACTCTTAGATCAATTACTGCACAGCCAGTAAAAATTGACAATGCTTCTCCTGCAAGTACTGTTCATGCT ACAGCAACGATACCGCTACAAGCCTCACTCCCTTTCGGATGGCTTGTTATTGGCGTTGCATTTCTTGCTGTTTTTCAGAG CGCTACCAAAATAATTGCGCTCAATAAAAGATGGCAGCTAGCCCTTTATAAGGGCTTCCAGTTCATTTGCAATTTACTGC TGCTATTTGTTACCATCTATTCACATCTTTTGCTTGTCGCTGCAGGTATGGAGGCGCAATTTTTGTACCTCTATGCCTTG ATATATTTTCTACAATGCATCAACGCATGTAGAATTATTATGAGATGTTGGCTTTGTTGGAAGTGCAAATCCAAGAACCC ATTACTTTATGATGCCAACTACTTTGTTTGCTGGCACACACATAACTATGACTACTGTATACCATATAACAGTGTCACAG ATACAATTGTCGTTACTGAAGGTGACGGCATTTCAACACCAAAACTCAAAGAAGACTACCAAATTGGTGGTTATTCTGAG GATAGGCACTCAGGTGTTAAAGACTATGTCGTTGTACATGGCTATTTCACCGAAGTTTACTACCAGCTTGAGTCTACACA AATTACTACAGACACTGGTATTGAAAATGCTACATTCTTCATCTTTAACAAGCTTGTTAAAGACCCACCGAATGTGCAAA TACACACAATCGACGGCTCTTCAGGAGTTGCTAATCCAGCAATGGATCCAATTTATGATGAGCCGACGACGACTACTAGC GTGCCTTTGTAAGCACAAGAAAGTGAGTACGAACTTATGTACTCATTCGTTTCGGAAGAAACAGGTACGTTAATAGTTAA TAGCGTACTTCTTTTTCTTGCTTTCGTGGTATTCTTGCTAGTCACACTAGCCATCCTTACTGCGCTTCGATTGTGTGCGT ACTGCTGCAATATTGTTAACGTGAGTTTAGTAAAACCAACGGTTTACGTCTACTCGCGTGTTAAAAATCTGAACTCTTCT GAAGGAGTTCCTGATCTTCTGGTCTAAACGAACTAACTATTATTATTATTCTGTTTGGAACTTTAACATTGCTTATCATG GCAGACAACGGTACTATTACCGTTGAGGAGCTTAAACAACTCCTGGAACAATGGAACCTAGTAATAGGTTTCCTATTCCT AGCCTGGATTATGTTACTACAATTTGCCTATTCTAATCGGAACAGGTTTTTGTACATAATAAAGCTTGTTTTCCTCTGGC TCTTGTGGCCAGTAACACTTGCTTGTTTTGTGCTTGCTGCTGTCTACAGAATTAATTGGGTGACTGGCGGGATTGCGATT GCAATGGCTTGTATTGTAGGCTTGATGTGGCTTAGCTACTTCGTTGCTTCCTTCAGGCTGTTTGCTCGTACCCGCTCAAT GTGGTCATTCAACCCAGAAACAAACATTCTTCTCAATGTGCCTCTCCGGGGGACAATTGTGACCAGACCGCTCATGGAAA GTGAACTTGTCATTGGTGCTGTGATCATTCGTGGTCACTTGCGAATGGCCGGACACTCCCTAGGGCGCTGTGACATTAAG GACCTGCCAAAAGAGATCACTGTGGCTACATCACGAACGCTTTCTTATTACAAATTAGGAGCGTCGCAGCGTGTAGGCAC TGATTCAGGTTTTGCTGCATACAACCGCTACCGTATTGGAAACTATAAATTAAATACAGACCACGCCGGTAGCAACGACA ATATTGCTTTGCTAGTACAGTAAGTGACAACAGATGTTTCATCTTGTTGACTTCCAGGTTACAATAGCAGAGATATTGAT TATCATTATGAGGACTTTCAGGATTGCTATTTGGAATCTTGACGTTATAATAAGTTCAATAGTGAGACAATTATTTAAGC CTCTAACTAAGAAGAATTATTCGGAGTTAGATGATGAAGAACCTATGGAGTTAGATTATCCATAAAACGAACATGAAAAT TATTCTCTTCCTGACATTGATTGTATTTACATCTTGCGAGCTATATCACTATCAGGAGTGTGTTAGAGGTACGACTGTAC TACTAAAAGAACCTTGCCCATCAGGAACATACGAGGGCAATTCACCATTTCACCCTCTTGCTGACAATAAATTTGCACTA ACTTGCACTAGCACACACTTTGCTTTTGCTTGTGCTGACGGTACTCGACATACCTATCAGCTGCGTGCAAGATCAGTTTC ACCAAAACTTTTCATCAGACAAGAGGAGGTTCAACAAGAGCTCTACTCGCCACTTTTTCTCATTGTTGCTGCTCTAGTAT TTTTAATACTTTGCTTCACCATTAAGAGAAAGACAGAATGAATGAGCTCACTTTAATTGACTTCTATTTGTGCTTTTTAG CCTTTCTGCTATTCCTTGTTTTAATAATGCTTATTATATTTTGGTTTTCACTCGAAATCCAGGATCTAGAAGAACCTTGT ACCAAAGTCTAAACGAACATGAAACTTCTCATTGTTTTGACTTGTATTTCTCTATGCAGTTGCATATGCACTGTAGTACA GCGCTGTGCATCTAATAAACCTCATGTGCTTGAAGATCCTTGTAAGGTACAACACTAGGGGTAATACTTATAGCACTGCT TGGCTTTGTGCTCTAGGAAAGGTTTTACCTTTTCATAGATGGCACACTATGGTTCAAACATGCACACCTAATGTTACTAT CAACTGTCAAGATCCAGCTGGTGGTGCGCTTATAGCTAGGTGTTGGTACCTTCATGAAGGTCACCAAACTGCTGCATTTA GAGACGTACTTGTTGTTTTAAATAAACGAACAAATTAAAATGTCTGATAATGGACCCCAATCAAACCAACGTAGTGCCCC CCGCATTACATTTGGTGGACCCACAGATTCAACTGACAATAACCAGAATGGAGGACGCAATGGGGCAAGGCCAAAACAGC GCCGACCCCAAGGTTTACCCAATAATACTGCGTCTTGGTTCACAGCTCTCACTCAGCATGGCAAGGAGGAACTTAGATTC CCTCGAGGCCAGGGCGTTCCAATCAACACCAATAGTGGTCCAGATGACCAAATTGGCTACTACCGAAGAGCTACCCGACG AGTTCGTGGTGGTGACGGCAAAATGAAAGAGCTCAGCCCCAGATGGTACTTCTATTACCTAGGAACTGGCCCAGAAGCTT CACTTCCCTACGGCGCTAACAAAGAAGGCATCGTATGGGTTGCAACTGAGGGAGCCTTGAATACACCCAAAGACCACATT GGCACCCGCAATCCTAATAACAATGCTGCCACCGTGCTACAACTTCCTCAAGGAACAACATTGCCAAAAGGCTTCTACGC AGAGGGAAGCAGAGGCGGCAGTCAAGCCTCTTCTCGCTCCTCATCACGTAGTCGCGGTAATTCAAGAAATTCAACTCCTG GCAGCAGTAGGGGAAATTCTCCTGCTCGAATGGCTAGCGGAGGTGGTGAAACTGCCCTCGCGCTATTGCTGCTAGACAGA TTGAACCAGCTTGAGAGCAAAGTTTCTGGTAAAGGCCAACAACAACAAGGCCAAACTGTCACTAAGAAATCTGCTGCTGA GGCATCTAAAAAGCCTCGCCAAAAACGTACTGCCACAAAACAGTACAACGTCACTCAAGCATTTGGGAGACGTGGTCCAG AACAAACCCAAGGAAATTTCGGGGACCAAGACCTAATCAGACAAGGAACTGATTACAAACATTGGCCGCAAATTGCACAA TTTGCTCCAAGTGCCTCTGCATTCTTTGGAATGTCACGCATTGGCATGGAAGTCACACCTTCGGGAACATGGCTGACTTA TCATGGAGCCATTAAATTGGATGACAAAGATCCACAATTCAAAGACAACGTCATACTGCTGAACAAGCACATTGACGCAT ACAAAACATTCCCACCAACAGAGCCTAAAAAGGACAAAAAGAAAAAGACTGATGAAGCTCAGCCTTTGCCGCAGAGACAA AAGAAGCAGCCCACTGTGACTCTTCTTCCTGCGGCTGACATGGATGATTTCTCCAGACAACTTCAAAATTCCATGAGTGG AGCTTCTGCTGATTCAACTCAGGCATAAACACTCATGATGACCACACAAGGCAGATGGGCTATGTAAACGTTTTCGCAAT TCCGTTTACGATACATAGTCTACTCTTGTGCAGAATGAATTCTCGTAACTAAACAGCACAAGTAGGTTTAGTTAACTTTA ATCTCACATAGCAATCTTTAATCAATGTGTAACATTAGGGAGGACTTGAAAGAGCCACCACATTTTCATCGAGGCCACGC GGAGTACGATCGAGGGTACAGTGAATAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTAATTTT AGTAGTGCTATCCCCATGTGATTTTAATAGCTTCTTAGGAGAATGACAAAAAAAAAAAAAAAAAAAAAAAA kraken2-2.1.2/data/names.dmp000066400000000000000000000040231404614725400155750ustar00rootroot000000000000001 | root | | scientific name | 10239 | Viruses | | scientific name | 10699 | Siphoviridae | | scientific name | 10710 | Escherichia virus Lambda | | scientific name | 11118 | Coronaviridae | | scientific name | 11308 | Orthomyxoviridae | | scientific name | 11320 | Influenza A virus | | scientific name | 11520 | Influenza B virus | | scientific name | 11632 | Retroviridae | | scientific name | 11646 | Lentivirus | | scientific name | 11676 | Human immunodeficiency virus 1 | | scientific name | 11709 | Human immunodeficiency virus 2 | | scientific name | 28883 | Caudovirales | | scientific name | 76804 | Nidovirales | | scientific name | 114727 | H1N1 subtype | | scientific name | 114729 | H2N2 subtype | | scientific name | 119210 | H3N2 subtype | | scientific name | 186765 | Lambdavirus | | scientific name | 197911 | Alphainfluenzavirus | | scientific name | 197912 | Betainfluenzavirus | | scientific name | 211044 | Influenza A virus (A/Puerto Rico/8/1934(H1N1)) | | scientific name | 327045 | Orthoretrovirinae | | scientific name | 335341 | Influenza A virus (A/New York/392/2004(H3N2)) | | scientific name | 488241 | Influenza A virus (A/Korea/426/1968(H2N2)) | | scientific name | 518987 | Influenza B virus (B/Lee/1940) | | scientific name | 694002 | Betacoronavirus | | scientific name | 694009 | Severe acute respiratory syndrome-related coronavirus | | scientific name | 1335626 | Middle East respiratory syndrome-related coronavirus | | scientific name | 2169561 | Ortervirales | | scientific name | 2497569 | Negarnaviricota | | scientific name | 2497571 | Polyploviricotina | | scientific name | 2497577 | Insthoviricetes | | scientific name | 2499399 | Cornidovirineae | | scientific name | 2499411 | Articulavirales | | scientific name | 2501931 | Orthocoronavirinae | | scientific name | 2509494 | Merbecovirus | | scientific name | 2509511 | Sarbecovirus | | scientific name | 2559587 | Riboviria | | scientific name | 2697049 | Severe acute respiratory syndrome coronavirus 2 | | scientific name | kraken2-2.1.2/data/nodes.dmp000066400000000000000000000050001404614725400155760ustar00rootroot000000000000001 | 1 | no rank | | 8 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | | 10239 | 1 | superkingdom | | 9 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | | 10699 | 28883 | family | | 3 | 0 | 11 | 1 | 0 | 1 | 0 | 0 | | 10710 | 186765 | species | EV | 3 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | 11118 | 2499399 | family | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | | 11308 | 2499411 | family | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | | 11320 | 197911 | species | IA | 9 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | | 11520 | 197912 | species | IB | 9 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | | 11632 | 2169561 | family | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | | 11646 | 327045 | genus | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | | 11676 | 11646 | species | HI | 9 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | | 11709 | 11646 | species | HI | 9 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | | 28883 | 10239 | order | | 3 | 0 | 11 | 0 | 0 | 1 | 0 | 0 | | 76804 | 2559587 | order | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | | 114727 | 11320 | no rank | | 9 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | | 114729 | 11320 | no rank | | 9 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | | 119210 | 11320 | no rank | | 9 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | | 186765 | 10699 | genus | | 3 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | 197911 | 11308 | genus | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | | 197912 | 11308 | genus | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | | 211044 | 114727 | no rank | | 9 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | | 327045 | 11632 | subfamily | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | | 335341 | 119210 | no rank | | 9 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | | 488241 | 114729 | no rank | | 9 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | | 518987 | 11520 | no rank | | 9 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | | 694002 | 2501931 | genus | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | | 694009 | 2509511 | species | SA | 9 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | | 1335626 | 2509494 | species | ME | 9 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | | 2169561 | 10239 | order | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | | 2497569 | 2559587 | phylum | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | | 2497571 | 2497569 | subphylum | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | | 2497577 | 2497571 | class | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | | 2499399 | 76804 | suborder | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | | 2499411 | 2497577 | order | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | | 2501931 | 11118 | subfamily | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | | 2509494 | 694002 | subgenus | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | | 2509511 | 694002 | subgenus | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | | 2559587 | 10239 | no rank | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | | 2697049 | 694009 | no rank | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | | kraken2-2.1.2/data/simulator.pl000077500000000000000000000066141404614725400163570ustar00rootroot00000000000000#!/usr/bin/env perl use strict; use warnings; use File::Basename; use Getopt::Long; my $PROG = basename $0; my $random_seed = 42; my $output_format = "reads#.fq"; my $num_frags = 100; my $frag_dist_params = "300,30"; my $read_length = 100; my $error_rate = 0.01; GetOptions( "random-seed:i" => \$random_seed, "output-format:s" => \$output_format, "num-frags:i" => \$num_frags, "frag-dist-params:s" => \$frag_dist_params, "read-length:i" => \$read_length, "error_rate:f" => \$error_rate, ) or die "$PROG: option parsing error\n"; my ($frag_dist_mu, $frag_dist_sd) = split /,/, $frag_dist_params; die "$PROG: illegal fragment length distribution $frag_dist_params\n" if $frag_dist_mu <= 0 || $frag_dist_sd * 6 > $frag_dist_mu; if ($random_seed >= 0) { srand($random_seed); } my $paired = $output_format =~ /#/; my ($fh1, $fh2); if ($paired) { my $f1 = $output_format; $f1 =~ s/#/_1/; open $fh1, ">", $f1 or die "$PROG: can't write $f1: $!\n"; my $f2 = $output_format; $f2 =~ s/#/_2/; open $fh2, ">", $f2 or die "$PROG: can't write $f2: $!\n"; } else { open $fh1, ">", $output_format or die "$PROG: can't write $output_format: $!\n"; } my %references; my ($seqid, $seq); while (<>) { if (/^>(\S+)/) { $references{$seqid} = $seq if defined $seqid; $seqid = $1; $seq = ""; } else { chomp; $seq .= $_; } } $references{$seqid} = $seq if defined $seqid; my $total_reference_length = 0; $total_reference_length += length($_) for values %references; for my $fragment_id (1 .. $num_frags) { my $ref_id = random_reference(); my $ref = $references{$ref_id}; my $fraglen; do { $fraglen = int(rand_normal() * $frag_dist_sd + $frag_dist_mu) } until $fraglen <= length($ref); my $startpos = int rand(length($ref) - $fraglen + 1); my $fragment = substr($ref, $startpos, $fraglen); my $seq1 = substr($fragment, 0, $read_length); my $seq2 = revcom(substr($fragment, -$read_length, $read_length)); my ($read1, $qual1) = errorize($seq1); my ($read2, $qual2) = errorize($seq2); print $fh1 "\@FRAG.${fragment_id}/1 $ref_id:@{[$startpos + 1]}\n$read1\n+\n$qual1\n"; print $fh2 "\@FRAG.${fragment_id}/2 $ref_id:@{[$startpos + 1]}\n$read2\n+\n$qual2\n" if $paired; } sub errorize { my $seq = shift; my $quals = "J" x length($seq); for my $i (0 .. length($seq) - 1) { if (rand() < $error_rate) { my @ch = grep { substr($seq, $i, 1) ne $_ } qw/A C G T/; my $edit = $ch[rand @ch]; substr($seq, $i, 1) = $edit; substr($quals, $i, 1) = '#'; } } while (length($seq) < $read_length) { $seq .= "N"; $quals .= "#"; } return ($seq, $quals); } sub random_reference { # Try to do random selection weighted by sequence length my $r = rand(); my $p = 0; while (my ($id, $seq) = each %references) { $p += length($seq) / $total_reference_length; if ($r < $p) { return $id; } } # Just return random ID as fallback my @k = keys %references; return $k[rand @k]; } { # Return random numbers with standard Gaussian distribution using Box-Muller my ($r, $th); sub rand_normal { my $z; if (defined $r) { $z = $r * sin($th); undef($r); } else { $r = sqrt(-2 * log(rand())); $th = 2 * 3.1415926 * rand(); $z = $r * cos($th); } return $z; } } sub revcom { local $_ = shift; tr/ABCDGHMNRSTUVWXYabcdghmnrstuvwxy/TVGHCDKNYSAABWXRtvghcdknysaabwxr/; return scalar reverse; } kraken2-2.1.2/docs/000077500000000000000000000000001404614725400140105ustar00rootroot00000000000000kraken2-2.1.2/docs/MANUAL.html000066400000000000000000001365321404614725400156650ustar00rootroot00000000000000 Kraken 2 Manual –

Kraken taxonomic sequence classification system

Version 2.1.2

Operating Manual

Table of Contents

Introduction

Kraken is a taxonomic sequence classifier that assigns taxonomic labels to DNA sequences. Kraken examines the k-mers within a query sequence and uses the information within those k-mers to query a database. That database maps k-mers to the lowest common ancestor (LCA) of all genomes known to contain a given k-mer.

The first version of Kraken used a large indexed and sorted list of k-mer/LCA pairs as its database. While fast, the large memory requirements posed some problems for users, and so Kraken 2 was created to provide a solution to those problems.

Kraken 2 differs from Kraken 1 in several important ways:

  1. Only minimizers of the k-mers in the query sequences are used as database queries. Similarly, only minimizers of the k-mers in the reference sequences in the database's genomic library are stored in the database. We will also refer to the minimizers as ℓ-mers, where ℓ ≤ k. All k-mers are considered to have the same LCA as their minimizer's database LCA value.
  2. Kraken 2 uses a compact hash table that is a probabilistic data structure. This means that occasionally, database queries will fail by either returning the wrong LCA, or by not resulting in a search failure when a queried minimizer was never actually stored in the database. By incurring the risk of these false positives in the data structure, Kraken 2 is able to achieve faster speeds and lower memory requirements. Users should be aware that database false positive errors occur in less than 1% of queries, and can be compensated for by use of confidence scoring thresholds.
  3. Kraken 2 has the ability to build a database from amino acid sequences and perform a translated search of the query sequences against that database.
  4. Kraken 2 utilizes spaced seeds in the storage and querying of minimizers to improve classification accuracy.
  5. Kraken 2 provides support for "special" databases that are not based on NCBI's taxonomy. These are currently limited to three popular 16S databases.

Because Kraken 2 only stores minimizers in its hash table, and k can be much larger than â„“, only a small percentage of the possible â„“-mers in a genomic library are actually deposited in the database. This creates a situation similar to the Kraken 1 "MiniKraken" databases; however, preliminary testing has shown the accuracy of a reduced Kraken 2 database to be quite similar to the full-sized Kraken 2 database, while Kraken 1's MiniKraken databases often resulted in a substantial loss of per-read sensitivity.

If you use Kraken 2 in your own work, please cite either the Kraken 2 paper and/or the original Kraken paper as appropriate. Thank you!

System Requirements

Installation

To begin using Kraken 2, you will first need to install it, and then either download or create a database.

Kraken 2 consists of two main scripts (kraken2 and kraken2-build), along with several programs and smaller scripts. As part of the installation process, all scripts and programs are installed in the same directory. After installation, you can move the main scripts elsewhere, but moving the other scripts and programs requires editing the scripts and changing the $KRAKEN2_DIR variables in the main scripts.

Once an install directory is selected, you need to run the following command in the directory where you extracted the Kraken 2 source:

./install_kraken2.sh $KRAKEN2_DIR

(Replace $KRAKEN2_DIR above with the directory where you want to install Kraken 2's programs/scripts.)

The install_kraken2.sh script should compile all of Kraken 2's code and setup your Kraken 2 program directory. Installation is successful if you see the message "Kraken 2 installation complete."

Once installation is complete, you may want to copy the main Kraken 2 scripts into a directory found in your PATH variable (e.g., "$HOME/bin"):

cp $KRAKEN2_DIR/kraken2{,-build,-inspect} $HOME/bin

After installation, you're ready to either create or download a database.

Kraken 2 Databases

A Kraken 2 database is a directory containing at least 3 files:

None of these three files are in a human-readable format. Other files may also be present as part of the database build process, and can, if desired, be removed after a successful build of the database.

In interacting with Kraken 2, you should not have to directly reference any of these files, but rather simply provide the name of the directory in which they are stored. Kraken 2 allows both the use of a standard database as well as custom databases; these are described in the sections Standard Kraken 2 Database and Custom Databases below, respectively.

Standard Kraken 2 Database

To create the standard Kraken 2 database, you can use the following command:

kraken2-build --standard --db $DBNAME

(Replace "$DBNAME" above with your preferred database name/location. Please note that the database will use approximately 100 GB of disk space during creation, with the majority of that being reference sequences or taxonomy mapping information that can be removed after the build.)

This will download NCBI taxonomic information, as well as the complete genomes in RefSeq for the bacterial, archaeal, and viral domains, along with the human genome and a collection of known vectors (UniVec_Core). After downloading all this data, the build process begins; this can be the most time-consuming step. If you have multiple processing cores, you can run this process with multiple threads, e.g.:

kraken2-build --standard --threads 24 --db $DBNAME

Using 32 threads on an AWS EC2 r4.8xlarge instance with 16 dual-core hyperthreaded 2.30 GHz CPUs and 244 GB of RAM, the build process took approximately 35 minutes in Jan. 2018.

The build process itself has two main steps, each of which requires passing over the contents of the reference library:

  1. Estimation of the capacity needed in the Kraken 2 compact hash table. This uses a low-memory method to reliably estimate the number of minimizers present in the reference library given the selected parameters k and â„“.
  2. Population of the hash table (and conversion of the taxonomy to an internal format). This step is a second pass over the reference library to find minimizers and then place them in the database.

(There is one other preliminary step where sequence IDs are mapped to taxonomy IDs, but this is usually a rather quick process and is mostly handled during library downloading.)

Unlike Kraken 1's build process, Kraken 2 does not perform checkpointing after the estimation step. This is because the estimation step is dependent on the selected k and â„“ values, and if the population step fails, it is likely because k needs to be increased (reducing the overall memory requirements).

Classification

To classify a set of sequences, use the kraken2 command:

kraken2 --db $DBNAME seqs.fa

Output will be sent to standard output by default. The files containing the sequences to be classified should be specified on the command line. Sequences can also be provided through standard input using the special filename /dev/fd/0.

The kraken2 program allows several different options:

To get a full list of options, use kraken2 --help.

Output Formats

Standard Kraken Output Format

Each sequence (or sequence pair, in the case of paired reads) classified by Kraken 2 results in a single line of output. Kraken 2's output lines contain five tab-delimited fields; from left to right, they are:

  1. "C"/"U": a one letter code indicating that the sequence was either classified or unclassified.
  2. The sequence ID, obtained from the FASTA/FASTQ header.
  3. The taxonomy ID Kraken 2 used to label the sequence; this is 0 if the sequence is unclassified.
  4. The length of the sequence in bp. In the case of paired read data, this will be a string containing the lengths of the two sequences in bp, separated by a pipe character, e.g. "98|94".
  5. A space-delimited list indicating the LCA mapping of each k-mer in the sequence(s). For example, "562:13 561:4 A:31 0:1 562:3" would indicate that:

Note that paired read data will contain a "|:|" token in this list to indicate the end of one read and the beginning of another.

When Kraken 2 is run against a protein database (see Translated Search), the LCA hitlist will contain the results of querying all six frames of each sequence. Reading frame data is separated by a "-:-" token.

Kraken 1 offered a kraken-translate and kraken-report script to change the output into different formats. Through the use of kraken2 --use-names, Kraken 2 will replace the taxonomy ID column with the scientific name and the taxonomy ID in parenthesis (e.g., "Bacteria (taxid 2)" instead of "2"), yielding similar functionality to Kraken 1's kraken-translate script. The sample report functionality now exists as part of the kraken2 script, with the use of the --report option; the sample report formats are described below.

Sample Report Output Format

Like Kraken 1, Kraken 2 offers two formats of sample-wide results. Kraken 2's standard sample report format is tab-delimited with one line per taxon. The fields of the output, from left-to-right, are as follows:

  1. Percentage of fragments covered by the clade rooted at this taxon
  2. Number of fragments covered by the clade rooted at this taxon
  3. Number of fragments assigned directly to this taxon
  4. A rank code, indicating (U)nclassified, (R)oot, (D)omain, (K)ingdom, (P)hylum, (C)lass, (O)rder, (F)amily, (G)enus, or (S)pecies. Taxa that are not at any of these 10 ranks have a rank code that is formed by using the rank code of the closest ancestor rank with a number indicating the distance from that rank. E.g., "G2" is a rank code indicating a taxon is between genus and species and the grandparent taxon is at the genus rank.
  5. NCBI taxonomic ID number
  6. Indented scientific name

The scientific names are indented using space, according to the tree structure specified by the taxonomy.

By default, taxa with no reads assigned to (or under) them will not have any output produced. However, if you wish to have all taxa displayed, you can use the --report-zero-counts switch to do so. This can be useful if you are looking to do further downstream analysis of the reports, and want to compare samples. Sorting by the taxonomy ID (using sort -k5,5n) can provide a consistent line ordering between reports.

In addition, we also provide the option --use-mpa-style that can be used in conjunction with --report. This option provides output in a format similar to MetaPhlAn's output. The output with this option provides one taxon per line, with a lowercase version of the rank codes in Kraken 2's standard sample report format (except for 'U' and 'R'), two underscores, and the scientific name of the taxon (e.g., "d__Viruses"). The full taxonomy of each taxon (at the eight ranks considered) is given, with each rank's name separated by a pipe character (e.g., "d__Viruses|o_Caudovirales"). Following this version of the taxon's scientific name is a tab and the number of fragments assigned to the clade rooted at that taxon.

Translated Search

Kraken 2 allows users to perform a six-frame translated search, similar to the well-known BLASTX program. To do this, Kraken 2 uses a reduced 15 amino acid alphabet and stores amino acid minimizers in its database. LCA results from all 6 frames are combined to yield a set of LCA hits, which is then resolved in the same manner as in Kraken's normal operation.

To build a protein database, the --protein option should be given to kraken2-build (either along with --standard, or with all steps if building a custom database).

Custom Databases

We realize the standard database may not suit everyone's needs. Kraken 2 also allows creation of customized databases.

To build a custom database:

  1. Install a taxonomy. Usually, you will just use the NCBI taxonomy, which you can easily download using:

    kraken2-build --download-taxonomy --db $DBNAME

    This will download the accession number to taxon maps, as well as the taxonomic name and tree information from NCBI. These files can be found in $DBNAME/taxonomy/ . If you need to modify the taxonomy, edits can be made to the names.dmp and nodes.dmp files in this directory; you may also need to modify the *.accession2taxid files appropriately.

    Some of the standard sets of genomic libraries have taxonomic information associated with them, and don't need the accession number to taxon maps to build the database successfully. These libraries include all those available through the --download-library option (see next point), except for the plasmid and non-redundant databases. If you are not using custom sequences (see the --add-to-library option) and are not using one of the plasmid or non-redundant database libraries, you may want to skip downloading of the accession number to taxon maps. This can be done by passing --skip-maps to the kraken2-build --download-taxonomy command.

  2. Install one or more reference libraries. Several sets of standard genomes/proteins are made easily available through kraken2-build:

    To download and install any one of these, use the --download-library switch, e.g.:

    kraken2-build --download-library bacteria --db $DBNAME

    Multiple libraries can be downloaded into a database prior to building by issuing multiple kraken2-build --download-library commands, e.g.:

    kraken2-build --download-library archaea --db $DBNAME
    kraken2-build --download-library viral --db $DBNAME

    The above commands would prepare a database that would contain archaeal and viral genomes; the --build option (see below) will still need to be used after downloading these libraries to actually build the database, however.

    (Note that downloading nr requires use of the --protein option, and that UniVec and UniVec_Core are incompatible with the --protein option.)

    Other genomes can also be added, but such genomes must meet certain requirements:

    Sequences not downloaded from NCBI may need their taxonomy information assigned explicitly. This can be done using the string kraken:taxid|XXX in the sequence ID, with XXX replaced by the desired taxon ID. For example, to put a known adapter sequence in taxon 32630 ("synthetic construct"), you could use the following:

    >sequence16|kraken:taxid|32630  Adapter sequence
    CAAGCAGAAGACGGCATACGAGATCTTCGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA

    The kraken:taxid string must begin the sequence ID or be immediately preceded by a pipe character (|). Explicit assignment of taxonomy IDs in this manner will override the accession number mapping provided by NCBI.

    If your genomes meet the requirements above, then you can add each sequence to your database's genomic library using the --add-to-library switch, e.g.:

    kraken2-build --add-to-library chr1.fa --db $DBNAME
    kraken2-build --add-to-library chr2.fa --db $DBNAME

    Note that if you have a list of files to add, you can do something like this in bash:

    for file in chr*.fa
    do
        kraken2-build --add-to-library $file --db $DBNAME
    done

    Or even add all *.fa files found in the directory genomes:

    find genomes/ -name '*.fa' -print0 | xargs -0 -I{} -n1 kraken2-build --add-to-library {} --db $DBNAME

    (You may also find the -P option to xargs useful to add many files in parallel if you have multiple processors.)

  3. Once your library is finalized, you need to build the database. This can be done with the command:

    kraken2-build --build --db $DBNAME

The --threads option is also helpful here to reduce build time.

By default, the values of k and â„“ are 35 and 31, respectively (or 15 and 12 for protein databases). These values can be explicitly set with the --kmer-len and --minimizer-len options, however. Note that the minimizer length must be no more than 31 for nucleotide databases, and 15 for protein databases. Additionally, the minimizer length â„“ must be no more than the k-mer length. There is no upper bound on the value of k, but sequences less than k bp in length cannot be classified.

Kraken 2 also utilizes a simple spaced seed approach to increase accuracy. A number s < â„“/4 can be chosen, and s positions in the minimizer will be masked out during all comparisons. Masked positions are chosen to alternate from the second-to-last position in the minimizer; e.g., s = 5 and â„“ = 31 will result in masking out the 0 positions shown here:

   111 1111 1111 1111 1111 1101 0101 0101

By default, s = 7 for nucleotide databases, and s = 0 for protein databases. This can be changed using the --minimizer-spaces option along with the --build task of kraken2-build.

A full list of options for kraken2-build can be obtained using kraken2-build --help.

After building a database, if you want to reduce the disk usage of the database, you can use the --clean option for kraken2-build to remove intermediate files from the database directory.

Masking of Low-complexity Sequences

Low-complexity sequences, e.g. "ACACACACACACACACACACACACAC", are known to occur in many different organisms and are typically less informative for use in alignments; the BLAST programs often mask these sequences by default. Using this masking can help prevent false positives in Kraken 2's results, and so we have added this functionality as a default option to Kraken 2's library download/addition process.

Kraken 2 uses two programs to perform low-complexity sequence masking, both available from NCBI: dustmasker, for nucleotide sequences, and segmasker, for amino acid sequences. These programs are available as part of the NCBI BLAST+ suite. If these programs are not installed on the local system and in the user's PATH when trying to use kraken2-build, the database build will fail. Users who do not wish to install these programs can use the --no-masking option to kraken2-build in conjunction with any of the --download-library, --add-to-library, or --standard options; use of the --no-masking option will skip masking of low-complexity sequences during the build of the Kraken 2 database.

Special Databases

To support some common use cases, we provide the ability to build Kraken 2 databases using data from various external databases. These external databases may not follow the NCBI taxonomy, and so we've provided mechanisms to automatically create a taxonomy that will work with Kraken 2 (although such taxonomies may not be identical to NCBI's).

To build one of these "special" Kraken 2 databases, use the following command:

kraken2-build --db $DBNAME --special TYPE

where the TYPE string is one of the database names listed below.

At present, the "special" Kraken 2 database support we provide is limited to pre-packaged solutions for some public 16S sequence databases, but this may grow in the future.

16S Databases

For targeted 16S sequencing projects, a normal Kraken 2 database using whole genome data may use more resources than necessary. A Kraken 2 database created from a well-curated genomic library of just 16S data can provide both a more efficient solution as well as a more accurate set of predictions for such projects. We provide support for building Kraken 2 databases from three publicly available 16S databases:

Note that these databases may have licensing restrictions regarding their data, and it is your responsibility to ensure you are in compliance with those restrictions; please visit the databases' websites for further details. The kraken2-build script only uses publicly available URLs to download data and then converts that data into a form compatible for use with Kraken 2.

Furthermore, if you use one of these databases in your research, please visit the corresponding database's website to determine the appropriate and up-to-date citation.

Confidence Scoring

At present, we have not yet developed a confidence score with a probabilistic interpretation for Kraken 2. However, we have developed a simple scoring scheme that has yielded good results for us, and we've made that available in Kraken 2 through use of the --confidence option to kraken2. The approach we use allows a user to specify a threshold score in the [0,1] interval; the classifier then will adjust labels up the tree until the label's score (described below) meets or exceeds that threshold. If a label at the root of the taxonomic tree would not have a score exceeding the threshold, the sequence is called unclassified by Kraken 2 when this threshold is applied.

A sequence label's score is a fraction C/Q, where C is the number of k-mers mapped to LCA values in the clade rooted at the label, and Q is the number of k-mers in the sequence that lack an ambiguous nucleotide (i.e., they were queried against the database). Consider the example of the LCA mappings in Kraken 2's output given earlier:

"562:13 561:4 A:31 0:1 562:3" would indicate that:

In this case, ID #561 is the parent node of #562. Here, a label of #562 for this sequence would have a score of C/Q = (13+3)/(13+4+1+3) = 16/21. A label of #561 would have a score of C/Q = (13+4+3)/(13+4+1+3) = 20/21. If a user specified a --confidence threshold over 16/21, the classifier would adjust the original label from #562 to #561; if the threshold was greater than 20/21, the sequence would become unclassified.

Inspecting a Kraken 2 Database's Contents

The kraken2-inspect script allows users to gain information about the content of a Kraken 2 database. The output format of kraken2-inspect is identical to the reports generated with the --report option to kraken2. Instead of reporting how many reads in input data classified to a given taxon or clade, as kraken2's --report option would, the kraken2-inspect script will report the number of minimizers in the database that are mapped to the various taxa/clades. For example, the first five lines of kraken2-inspect's output on an example database might look like this:

$ kraken2-inspect --db EXAMPLE_DB | head -5
100.00% 1770368409      1581179 R       1       root
 96.50% 1708407622      58003   R1      131567    cellular organisms
 91.28% 1615910070      985309  D       2           Bacteria
 43.89% 777062062       1312736 P       1224          Proteobacteria
 18.62% 329590216       555667  C       1236            Gammaproteobacteria

This output indicates that 555667 of the minimizers in the database map directly to the Gammaproteobacteria class (taxid #1236), and 329590216 (18.62%) of the database's minimizers map to a taxon in the clade rooted at Gammaproteobacteria. For more information on kraken2-inspect's options, use its --help option.

Distinct minimizer count information

The KrakenUniq project extended Kraken 1 by, among other things, reporting an estimate of the number of distinct k-mers associated with each taxon in the input sequencing data. This allows users to better determine if Kraken's classifications are due to reads distributed throughout a reference genome, or due to only a small segment of a reference genome (and therefore likely false positive).

Thanks to the generosity of KrakenUniq's developer Florian Breitwieser in allowing parts of the KrakenUniq source code to be licensed under Kraken 2's MIT license, this distinct counting estimation is now available in Kraken 2. Development work by Martin Steinegger and Ben Langmead helped bring this functionality to Kraken 2.

At present, this functionality is an optional experimental feature -- meaning that we may later alter it in a way that is not backwards compatible with previous versions of the feature.

To use this functionality, simply run the kraken2 script with the additional --report-minimizer-data flag along with --report, e.g.:

kraken2 --db $DBNAME --report k2_report.txt --report-minimizer-data \
    --output k2_output.txt sequence_data.fq

This will put the standard Kraken 2 output (formatted as described in Standard Kraken Output Format) in k2_output.txt and the report information in k2_report.txt. Within the report file, two additional columns will be present, e.g.:

normal report format:

36.40   182 182 S2  211044                        Influenza A virus (A/Puerto Rico/8/1934(H1N1))

modified report format:

36.40   182 182 1688    18  S2  211044                        Influenza A virus (A/Puerto Rico/8/1934(H1N1))

In this modified report format, the two new columns are the fourth and fifth, respectively representing the number of minimizers found to be associated with a taxon in the read sequences (1688), and the estimate of the number of distinct minimizers associated with a taxon in the read sequence data (18). This would indicate that although 182 reads were classified as belonging to H1N1 influenza, only 18 distinct minimizers led to those 182 classifications.

The format with the --report-minimizer-data flag, then, is similar to that described in Sample Report Output Format, but slightly different. The fields in this new format, from left-to-right, are:

  1. Percentage of fragments covered by the clade rooted at this taxon
  2. Number of fragments covered by the clade rooted at this taxon
  3. Number of fragments assigned directly to this taxon
  4. Number of minimizers in read data associated with this taxon (new)
  5. An estimate of the number of distinct minimizers in read data associated with this taxon (new)
  6. A rank code, indicating (U)nclassified, (R)oot, (D)omain, (K)ingdom, (P)hylum, (C)lass, (O)rder, (F)amily, (G)enus, or (S)pecies. Taxa that are not at any of these 10 ranks have a rank code that is formed by using the rank code of the closest ancestor rank with a number indicating the distance from that rank. E.g., "G2" is a rank code indicating a taxon is between genus and species and the grandparent taxon is at the genus rank.
  7. NCBI taxonomic ID number
  8. Indented scientific name

We decided to make this an optional feature so as not to break existing software that processes Kraken 2's standard report format. However, this new format can be converted to the standard report format with the command:

cut -f1-3,6-8 k2_new_report.txt > k2_std_report.txt

As noted above, this is an experimental feature. We intend to continue development on this feature, and may change the new format and/or its information if we determine it to be necessary.

For background on the data structures used in this feature and their interaction with Kraken, please read the KrakenUniq paper, and please cite that paper if you use this functionality as part of your work.

Kraken 2 Environment Variables

The kraken2 and kraken2-inspect scripts supports the use of some environment variables to help in reducing command line lengths:

kraken2-2.1.2/docs/MANUAL.markdown000066400000000000000000001162651404614725400165440ustar00rootroot00000000000000Introduction ============ Kraken is a taxonomic sequence classifier that assigns taxonomic labels to DNA sequences. Kraken examines the $k$-mers within a query sequence and uses the information within those $k$-mers to query a database. That database maps $k$-mers to the lowest common ancestor (LCA) of all genomes known to contain a given $k$-mer. The first version of [Kraken] used a large indexed and sorted list of $k$-mer/LCA pairs as its database. While fast, the large memory requirements posed some problems for users, and so Kraken 2 was created to provide a solution to those problems. [Kraken 2] differs from Kraken 1 in several important ways: 1. Only minimizers of the $k$-mers in the query sequences are used as database queries. Similarly, only minimizers of the $k$-mers in the reference sequences in the database's genomic library are stored in the database. We will also refer to the minimizers as $\ell$-mers, where $\ell \leq k$. All $k$-mers are considered to have the same LCA as their minimizer's database LCA value. 2. Kraken 2 uses a compact hash table that is a probabilistic data structure. This means that occasionally, database queries will fail by either returning the wrong LCA, or by not resulting in a search failure when a queried minimizer was never actually stored in the database. By incurring the risk of these false positives in the data structure, Kraken 2 is able to achieve faster speeds and lower memory requirements. Users should be aware that database false positive errors occur in less than 1% of queries, and can be compensated for by use of confidence scoring thresholds. 3. Kraken 2 has the ability to build a database from amino acid sequences and perform a translated search of the query sequences against that database. 4. Kraken 2 utilizes spaced seeds in the storage and querying of minimizers to improve classification accuracy. 5. Kraken 2 provides support for "special" databases that are not based on NCBI's taxonomy. These are currently limited to three popular 16S databases. Because Kraken 2 only stores minimizers in its hash table, and $k$ can be much larger than $\ell$, only a small percentage of the possible $\ell$-mers in a genomic library are actually deposited in the database. This creates a situation similar to the Kraken 1 "MiniKraken" databases; however, preliminary testing has shown the accuracy of a reduced Kraken 2 database to be quite similar to the full-sized Kraken 2 database, while Kraken 1's MiniKraken databases often resulted in a substantial loss of per-read sensitivity. If you use Kraken 2 in your own work, please cite either the [Kraken 2 paper] and/or the original [Kraken paper] as appropriate. Thank you! [Kraken]: https://ccb.jhu.edu/software/kraken/ [Kraken 2]: https://ccb.jhu.edu/software/kraken2/ [Kraken paper]: https://genomebiology.biomedcentral.com/articles/10.1186/gb-2014-15-3-r46 [Kraken 2 paper]: https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1891-0 System Requirements =================== * **Disk space**: Construction of a Kraken 2 standard database requires approximately 100 GB of disk space. A test on 01 Jan 2018 of the default installation showed 42 GB of disk space was used to store the genomic library files, 26 GB was used to store the taxonomy information from NCBI, and 29 GB was used to store the Kraken 2 compact hash table. Like in Kraken 1, we strongly suggest against using NFS storage to store the Kraken 2 database if at all possible. * **Memory**: To run efficiently, Kraken 2 requires enough free memory to hold the database (primarily the hash table) in RAM. While this can be accomplished with a ramdisk, Kraken 2 will by default load the database into process-local RAM; the `--memory-mapping` switch to `kraken2` will avoid doing so. The default database size is 29 GB (as of Jan. 2018), and you will need slightly more than that in RAM if you want to build the default database. * **Dependencies**: Kraken 2 currently makes extensive use of Linux utilities such as sed, find, and wget. Many scripts are written using the Bash shell, and the main scripts are written using Perl. Core programs needed to build the database and run the classifier are written in C++11, and need to be compiled using a somewhat recent version of g++ that will support C++11. Multithreading is handled using OpenMP. Downloads of NCBI data are performed by wget and rsync. Most Linux systems will have all of the above listed programs and development libraries available either by default or via package download. Unlike Kraken 1, Kraken 2 does not use an external $k$-mer counter. However, by default, Kraken 2 will attempt to use the `dustmasker` or `segmasker` programs provided as part of NCBI's BLAST suite to mask low-complexity regions (see [Masking of Low-complexity Sequences]). **MacOS NOTE:** MacOS and other non-Linux operating systems are *not* explicitly supported by the developers, and MacOS users should refer to the Kraken-users group for support in installing the appropriate utilities to allow for full operation of Kraken 2. We will attempt to use MacOS-compliant code when possible, but development and testing time is at a premium and we cannot guarantee that Kraken 2 will install and work to its full potential on a default installation of MacOS. In particular, we note that the default MacOS X installation of GCC does not have support for OpenMP. Without OpenMP, Kraken 2 is limited to single-threaded operation, resulting in slower build and classification runtimes. * **Network connectivity**: Kraken 2's standard database build and download commands expect unfettered FTP and rsync access to the NCBI FTP server. If you're working behind a proxy, you may need to set certain environment variables (such as `ftp_proxy` or `RSYNC_PROXY`) in order to get these commands to work properly. Kraken 2's scripts default to using rsync for most downloads; however, you may find that your network situation prevents use of rsync. In such cases, you can try the `--use-ftp` option to `kraken2-build` to force the downloads to occur via FTP. * **MiniKraken**: At present, users with low-memory computing environments can replicate the "MiniKraken" functionality of Kraken 1 in two ways: first, by increasing the value of $k$ with respect to $\ell$ (using the `--kmer-len` and `--minimizer-len` options to `kraken2-build`); and secondly, through downsampling of minimizers (from both the database and query sequences) using a hash function. This second option is performed if the `--max-db-size` option to `kraken2-build` is used; however, the two options are not mutually exclusive. In a difference from Kraken 1, Kraken 2 does not require building a full database and then shrinking it to obtain a reduced database. Installation ============ To begin using Kraken 2, you will first need to install it, and then either download or create a database. Kraken 2 consists of two main scripts (`kraken2` and `kraken2-build`), along with several programs and smaller scripts. As part of the installation process, all scripts and programs are installed in the same directory. After installation, you can move the main scripts elsewhere, but moving the other scripts and programs requires editing the scripts and changing the `$KRAKEN2_DIR` variables in the main scripts. Once an install directory is selected, you need to run the following command in the directory where you extracted the Kraken 2 source: ./install_kraken2.sh $KRAKEN2_DIR (Replace `$KRAKEN2_DIR` above with the directory where you want to install Kraken 2's programs/scripts.) The `install_kraken2.sh` script should compile all of Kraken 2's code and setup your Kraken 2 program directory. Installation is successful if you see the message "`Kraken 2 installation complete.`" Once installation is complete, you may want to copy the main Kraken 2 scripts into a directory found in your `PATH` variable (e.g., "`$HOME/bin`"): cp $KRAKEN2_DIR/kraken2{,-build,-inspect} $HOME/bin After installation, you're ready to either create or download a database. Kraken 2 Databases ================== A Kraken 2 database is a directory containing at least 3 files: * `hash.k2d`: Contains the minimizer to taxon mappings * `opts.k2d`: Contains information about the options used to build the database * `taxo.k2d`: Contains taxonomy information used to build the database None of these three files are in a human-readable format. Other files may also be present as part of the database build process, and can, if desired, be removed after a successful build of the database. In interacting with Kraken 2, you should not have to directly reference any of these files, but rather simply provide the name of the directory in which they are stored. Kraken 2 allows both the use of a standard database as well as custom databases; these are described in the sections [Standard Kraken 2 Database] and [Custom Databases] below, respectively. Standard Kraken 2 Database ========================== To create the standard Kraken 2 database, you can use the following command: kraken2-build --standard --db $DBNAME (Replace "`$DBNAME`" above with your preferred database name/location. Please note that the database will use approximately 100 GB of disk space during creation, with the majority of that being reference sequences or taxonomy mapping information that can be removed after the build.) This will download NCBI taxonomic information, as well as the complete genomes in RefSeq for the bacterial, archaeal, and viral domains, along with the human genome and a collection of known vectors (UniVec_Core). After downloading all this data, the build process begins; this can be the most time-consuming step. If you have multiple processing cores, you can run this process with multiple threads, e.g.: kraken2-build --standard --threads 24 --db $DBNAME Using 32 threads on an AWS EC2 r4.8xlarge instance with 16 dual-core hyperthreaded 2.30 GHz CPUs and 244 GB of RAM, the build process took approximately 35 minutes in Jan. 2018. The build process itself has two main steps, each of which requires passing over the contents of the reference library: 1. **Estimation** of the capacity needed in the Kraken 2 compact hash table. This uses a low-memory method to reliably estimate the number of minimizers present in the reference library given the selected parameters $k$ and $\ell$. 2. **Population** of the hash table (and conversion of the taxonomy to an internal format). This step is a second pass over the reference library to find minimizers and then place them in the database. (There is one other preliminary step where sequence IDs are mapped to taxonomy IDs, but this is usually a rather quick process and is mostly handled during library downloading.) Unlike Kraken 1's build process, Kraken 2 does not perform checkpointing after the estimation step. This is because the estimation step is dependent on the selected $k$ and $\ell$ values, and if the population step fails, it is likely because $k$ needs to be increased (reducing the overall memory requirements). Classification ============== To classify a set of sequences, use the `kraken2` command: kraken2 --db $DBNAME seqs.fa Output will be sent to standard output by default. The files containing the sequences to be classified should be specified on the command line. Sequences can also be provided through standard input using the special filename `/dev/fd/0`. The `kraken2` program allows several different options: * **Multithreading**: Use the `--threads NUM` switch to use multiple threads. * **Quick operation**: Rather than searching all $\ell$-mers in a sequence, stop classification after the first database hit; use `--quick` to enable this mode. * **Hit group threshold**: The option `--minimum-hit-groups` will allow you to require multiple hit groups (a group of overlapping k-mers that share a common minimizer that is found in the hash table) be found before declaring a sequence classified, which can be especially useful with custom databases when testing to see if sequences either do or do not belong to a particular genome. * **Sequence filtering**: Classified or unclassified sequences can be sent to a file for later processing, using the `--classified-out` and `--unclassified-out` switches, respectively. * **Output redirection**: Output can be directed using standard shell redirection (`|` or `>`), or using the `--output` switch. * **Compressed input**: Kraken 2 can handle gzip and bzip2 compressed files as input by specifying the proper switch of `--gzip-compressed` or `--bzip2-compressed`. * **Input format auto-detection**: If regular files (i.e., not pipes or device files) are specified on the command line as input, Kraken 2 will attempt to determine the format of your input prior to classification. You can disable this by explicitly specifying `--gzip-compressed` or `--bzip2-compressed` as appropriate. Note that use of the character device file `/dev/fd/0` to read from standard input (aka `stdin`) will **not** allow auto-detection. * **Paired reads**: Kraken 2 provides an enhancement over Kraken 1 in its handling of paired read data. Rather than needing to concatenate the pairs together with an `N` character between the reads, Kraken 2 is able to process the mates individually while still recognizing the pairing information. Using the `--paired` option to `kraken2` will indicate to `kraken2` that the input files provided are paired read data, and data will be read from the pairs of files concurrently. Usage of `--paired` also affects the `--classified-out` and `--unclassified-out` options; users should provide a `#` character in the filenames provided to those options, which will be replaced by `kraken2` with "`_1`" and "`_2`" with mates spread across the two files appropriately. For example: kraken2 --paired --classified-out cseqs#.fq seqs_1.fq seqs_2.fq will put the first reads from classified pairs in `cseqs_1.fq`, and the second reads from those pairs in `cseqs_2.fq`. To get a full list of options, use `kraken2 --help`. Output Formats ============== Standard Kraken Output Format ----------------------------- Each sequence (or sequence pair, in the case of paired reads) classified by Kraken 2 results in a single line of output. Kraken 2's output lines contain five tab-delimited fields; from left to right, they are: 1. "C"/"U": a one letter code indicating that the sequence was either classified or unclassified. 2. The sequence ID, obtained from the FASTA/FASTQ header. 3. The taxonomy ID Kraken 2 used to label the sequence; this is 0 if the sequence is unclassified. 4. The length of the sequence in bp. In the case of paired read data, this will be a string containing the lengths of the two sequences in bp, separated by a pipe character, e.g. "98|94". 5. A space-delimited list indicating the LCA mapping of each $k$-mer in the sequence(s). For example, "562:13 561:4 A:31 0:1 562:3" would indicate that: - the first 13 $k$-mers mapped to taxonomy ID #562 - the next 4 $k$-mers mapped to taxonomy ID #561 - the next 31 $k$-mers contained an ambiguous nucleotide - the next $k$-mer was not in the database - the last 3 $k$-mers mapped to taxonomy ID #562 Note that paired read data will contain a "`|:|`" token in this list to indicate the end of one read and the beginning of another. When Kraken 2 is run against a protein database (see [Translated Search]), the LCA hitlist will contain the results of querying all six frames of each sequence. Reading frame data is separated by a "`-:-`" token. Kraken 1 offered a `kraken-translate` and `kraken-report` script to change the output into different formats. Through the use of `kraken2 --use-names`, Kraken 2 will replace the taxonomy ID column with the scientific name and the taxonomy ID in parenthesis (e.g., "Bacteria (taxid 2)" instead of "2"), yielding similar functionality to Kraken 1's `kraken-translate` script. The sample report functionality now exists as part of the `kraken2` script, with the use of the `--report` option; the sample report formats are described below. Sample Report Output Format --------------------------- Like Kraken 1, Kraken 2 offers two formats of sample-wide results. Kraken 2's standard sample report format is tab-delimited with one line per taxon. The fields of the output, from left-to-right, are as follows: 1. Percentage of fragments covered by the clade rooted at this taxon 2. Number of fragments covered by the clade rooted at this taxon 3. Number of fragments assigned directly to this taxon 4. A rank code, indicating (U)nclassified, (R)oot, (D)omain, (K)ingdom, (P)hylum, (C)lass, (O)rder, (F)amily, (G)enus, or (S)pecies. Taxa that are not at any of these 10 ranks have a rank code that is formed by using the rank code of the closest ancestor rank with a number indicating the distance from that rank. E.g., "G2" is a rank code indicating a taxon is between genus and species and the grandparent taxon is at the genus rank. 5. NCBI taxonomic ID number 6. Indented scientific name The scientific names are indented using space, according to the tree structure specified by the taxonomy. By default, taxa with no reads assigned to (or under) them will not have any output produced. However, if you wish to have all taxa displayed, you can use the `--report-zero-counts` switch to do so. This can be useful if you are looking to do further downstream analysis of the reports, and want to compare samples. Sorting by the taxonomy ID (using `sort -k5,5n`) can provide a consistent line ordering between reports. In addition, we also provide the option `--use-mpa-style` that can be used in conjunction with `--report`. This option provides output in a format similar to MetaPhlAn's output. The output with this option provides one taxon per line, with a lowercase version of the rank codes in Kraken 2's standard sample report format (except for 'U' and 'R'), two underscores, and the scientific name of the taxon (e.g., "d__Viruses"). The full taxonomy of each taxon (at the eight ranks considered) is given, with each rank's name separated by a pipe character (e.g., "d__Viruses|o_Caudovirales"). Following this version of the taxon's scientific name is a tab and the number of fragments assigned to the clade rooted at that taxon. Translated Search ================= Kraken 2 allows users to perform a six-frame translated search, similar to the well-known BLASTX program. To do this, Kraken 2 uses a reduced 15 amino acid alphabet and stores amino acid minimizers in its database. LCA results from all 6 frames are combined to yield a set of LCA hits, which is then resolved in the same manner as in Kraken's normal operation. To build a protein database, the `--protein` option should be given to `kraken2-build` (either along with `--standard`, or with all steps if building a custom database). Custom Databases ================ We realize the standard database may not suit everyone's needs. Kraken 2 also allows creation of customized databases. To build a custom database: 1. Install a taxonomy. Usually, you will just use the NCBI taxonomy, which you can easily download using: kraken2-build --download-taxonomy --db $DBNAME This will download the accession number to taxon maps, as well as the taxonomic name and tree information from NCBI. These files can be found in `$DBNAME/taxonomy/` . If you need to modify the taxonomy, edits can be made to the `names.dmp` and `nodes.dmp` files in this directory; you may also need to modify the `*.accession2taxid` files appropriately. Some of the standard sets of genomic libraries have taxonomic information associated with them, and don't need the accession number to taxon maps to build the database successfully. These libraries include all those available through the `--download-library` option (see next point), except for the `plasmid` and non-redundant databases. If you are not using custom sequences (see the `--add-to-library` option) and are not using one of the `plasmid` or non-redundant database libraries, you may want to skip downloading of the accession number to taxon maps. This can be done by passing `--skip-maps` to the `kraken2-build --download-taxonomy` command. 2. Install one or more reference libraries. Several sets of standard genomes/proteins are made easily available through `kraken2-build`: - `archaea`: RefSeq complete archaeal genomes/proteins - `bacteria`: RefSeq complete bacterial genomes/proteins - `plasmid`: RefSeq plasmid nucleotide/protein sequences - `viral`: RefSeq complete viral genomes/proteins - `human`: GRCh38 human genome/proteins - `fungi`: RefSeq complete fungal genomes/proteins - `plant`: RefSeq complete plant genomes/proteins - `protozoa`: RefSeq complete protozoan genomes/proteins - `nr`: NCBI non-redundant protein database - `nt`: NCBI non-redundant nucleotide database - `UniVec`: NCBI-supplied database of vector, adapter, linker, and primer sequences that may be contaminating sequencing projects and/or assemblies - `UniVec_Core`: A subset of UniVec chosen to minimize false positive hits to the vector database To download and install any one of these, use the `--download-library` switch, e.g.: kraken2-build --download-library bacteria --db $DBNAME Multiple libraries can be downloaded into a database prior to building by issuing multiple `kraken2-build --download-library` commands, e.g.: kraken2-build --download-library archaea --db $DBNAME kraken2-build --download-library viral --db $DBNAME The above commands would prepare a database that would contain archaeal and viral genomes; the `--build` option (see below) will still need to be used after downloading these libraries to actually build the database, however. (Note that downloading `nr` requires use of the `--protein` option, and that `UniVec` and `UniVec_Core` are incompatible with the `--protein` option.) Other genomes can also be added, but such genomes must meet certain requirements: - Sequences must be in a FASTA file (multi-FASTA is allowed) - Each sequence's ID (the string between the `>` and the first whitespace character on the header line) must contain either an NCBI accession number to allow Kraken 2 to lookup the correct taxa, or an explicit assignment of the taxonomy ID using `kraken:taxid` (see below). Sequences not downloaded from NCBI may need their taxonomy information assigned explicitly. This can be done using the string `kraken:taxid|XXX` in the sequence ID, with `XXX` replaced by the desired taxon ID. For example, to put a known adapter sequence in taxon 32630 ("synthetic construct"), you could use the following: >sequence16|kraken:taxid|32630 Adapter sequence CAAGCAGAAGACGGCATACGAGATCTTCGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA The `kraken:taxid` string must begin the sequence ID or be immediately preceded by a pipe character (`|`). Explicit assignment of taxonomy IDs in this manner will override the accession number mapping provided by NCBI. If your genomes meet the requirements above, then you can add each sequence to your database's genomic library using the `--add-to-library` switch, e.g.: kraken2-build --add-to-library chr1.fa --db $DBNAME kraken2-build --add-to-library chr2.fa --db $DBNAME Note that if you have a list of files to add, you can do something like this in `bash`: for file in chr*.fa do kraken2-build --add-to-library $file --db $DBNAME done Or even add all `*.fa` files found in the directory `genomes`: `find genomes/ -name '*.fa' -print0 | xargs -0 -I{} -n1 kraken2-build --add-to-library {} --db $DBNAME` (You may also find the `-P` option to `xargs` useful to add many files in parallel if you have multiple processors.) 3. Once your library is finalized, you need to build the database. This can be done with the command: kraken2-build --build --db $DBNAME The `--threads` option is also helpful here to reduce build time. By default, the values of $k$ and $\ell$ are 35 and 31, respectively (or 15 and 12 for protein databases). These values can be explicitly set with the `--kmer-len` and `--minimizer-len` options, however. Note that the minimizer length must be no more than 31 for nucleotide databases, and 15 for protein databases. Additionally, the minimizer length $\ell$ must be no more than the $k$-mer length. There is no upper bound on the value of $k$, but sequences less than $k$ bp in length cannot be classified. Kraken 2 also utilizes a simple spaced seed approach to increase accuracy. A number $s$ < $\ell$/4 can be chosen, and $s$ positions in the minimizer will be masked out during all comparisons. Masked positions are chosen to alternate from the second-to-last position in the minimizer; e.g., $s$ = 5 and $\ell$ = 31 will result in masking out the 0 positions shown here: 111 1111 1111 1111 1111 1101 0101 0101 By default, $s$ = 7 for nucleotide databases, and $s$ = 0 for protein databases. This can be changed using the `--minimizer-spaces` option along with the `--build` task of `kraken2-build`. A full list of options for `kraken2-build` can be obtained using `kraken2-build --help`. After building a database, if you want to reduce the disk usage of the database, you can use the `--clean` option for `kraken2-build` to remove intermediate files from the database directory. Masking of Low-complexity Sequences =================================== Low-complexity sequences, e.g. "ACACACACACACACACACACACACAC", are known to occur in many different organisms and are typically less informative for use in alignments; the BLAST programs often mask these sequences by default. Using this masking can help prevent false positives in Kraken 2's results, and so we have added this functionality as a default option to Kraken 2's library download/addition process. Kraken 2 uses two programs to perform low-complexity sequence masking, both available from NCBI: `dustmasker`, for nucleotide sequences, and `segmasker`, for amino acid sequences. These programs are available as part of the NCBI BLAST+ suite. If these programs are not installed on the local system and in the user's PATH when trying to use `kraken2-build`, the database build will fail. Users who do not wish to install these programs can use the `--no-masking` option to `kraken2-build` in conjunction with any of the `--download-library`, `--add-to-library`, or `--standard` options; use of the `--no-masking` option will skip masking of low-complexity sequences during the build of the Kraken 2 database. Special Databases ================= To support some common use cases, we provide the ability to build Kraken 2 databases using data from various external databases. These external databases may not follow the NCBI taxonomy, and so we've provided mechanisms to automatically create a taxonomy that will work with Kraken 2 (although such taxonomies may not be identical to NCBI's). To build one of these "special" Kraken 2 databases, use the following command: kraken2-build --db $DBNAME --special TYPE where the `TYPE` string is one of the database names listed below. At present, the "special" Kraken 2 database support we provide is limited to pre-packaged solutions for some public 16S sequence databases, but this may grow in the future. 16S Databases ------------- For targeted 16S sequencing projects, a normal Kraken 2 database using whole genome data may use more resources than necessary. A Kraken 2 database created from a well-curated genomic library of just 16S data can provide both a more efficient solution as well as a more accurate set of predictions for such projects. We provide support for building Kraken 2 databases from three publicly available 16S databases: * [Greengenes] (Kraken 2 database name: `greengenes`), using all available 16S data. * [RDP] (Kraken 2 database name: `rdp`), using the bacterial and archaeal 16S data. * [SILVA] (Kraken 2 database name: `silva`), using the Small subunit NR99 sequence set. Note that these databases may have licensing restrictions regarding their data, and it is your responsibility to ensure you are in compliance with those restrictions; please visit the databases' websites for further details. The `kraken2-build` script only uses publicly available URLs to download data and then converts that data into a form compatible for use with Kraken 2. Furthermore, if you use one of these databases in your research, please visit the corresponding database's website to determine the appropriate and up-to-date citation. [Greengenes]: http://greengenes.lbl.gov/ [RDP]: http://rdp.cme.msu.edu/ [SILVA]: http://www.arb-silva.de/ Confidence Scoring ================== At present, we have not yet developed a confidence score with a probabilistic interpretation for Kraken 2. However, we have developed a simple scoring scheme that has yielded good results for us, and we've made that available in Kraken 2 through use of the `--confidence` option to `kraken2`. The approach we use allows a user to specify a threshold score in the [0,1] interval; the classifier then will adjust labels up the tree until the label's score (described below) meets or exceeds that threshold. If a label at the root of the taxonomic tree would not have a score exceeding the threshold, the sequence is called unclassified by Kraken 2 when this threshold is applied. A sequence label's score is a fraction $C$/$Q$, where $C$ is the number of $k$-mers mapped to LCA values in the clade rooted at the label, and $Q$ is the number of $k$-mers in the sequence that lack an ambiguous nucleotide (i.e., they were queried against the database). Consider the example of the LCA mappings in Kraken 2's output given earlier: "562:13 561:4 A:31 0:1 562:3" would indicate that: * the first 13 $k$-mers mapped to taxonomy ID #562 * the next 4 $k$-mers mapped to taxonomy ID #561 * the next 31 $k$-mers contained an ambiguous nucleotide * the next $k$-mer was not in the database * the last 3 $k$-mers mapped to taxonomy ID #562 In this case, ID #561 is the parent node of #562. Here, a label of #562 for this sequence would have a score of $C$/$Q$ = (13+3)/(13+4+1+3) = 16/21. A label of #561 would have a score of $C$/$Q$ = (13+4+3)/(13+4+1+3) = 20/21. If a user specified a `--confidence` threshold over 16/21, the classifier would adjust the original label from #562 to #561; if the threshold was greater than 20/21, the sequence would become unclassified. Inspecting a Kraken 2 Database's Contents ========================================= The `kraken2-inspect` script allows users to gain information about the content of a Kraken 2 database. The output format of `kraken2-inspect` is identical to the reports generated with the `--report` option to `kraken2`. Instead of reporting how many reads in input data classified to a given taxon or clade, as `kraken2`'s `--report` option would, the `kraken2-inspect` script will report the number of minimizers in the database that are mapped to the various taxa/clades. For example, the first five lines of `kraken2-inspect`'s output on an example database might look like this: $ kraken2-inspect --db EXAMPLE_DB | head -5 100.00% 1770368409 1581179 R 1 root 96.50% 1708407622 58003 R1 131567 cellular organisms 91.28% 1615910070 985309 D 2 Bacteria 43.89% 777062062 1312736 P 1224 Proteobacteria 18.62% 329590216 555667 C 1236 Gammaproteobacteria This output indicates that 555667 of the minimizers in the database map directly to the Gammaproteobacteria class (taxid #1236), and 329590216 (18.62%) of the database's minimizers map to a taxon in the clade rooted at Gammaproteobacteria. For more information on `kraken2-inspect`'s options, use its `--help` option. Distinct minimizer count information ==================================== The [KrakenUniq] project extended Kraken 1 by, among other things, reporting an estimate of the number of distinct k-mers associated with each taxon in the input sequencing data. This allows users to better determine if Kraken's classifications are due to reads distributed throughout a reference genome, or due to only a small segment of a reference genome (and therefore likely false positive). Thanks to the generosity of KrakenUniq's developer Florian Breitwieser in allowing parts of the KrakenUniq source code to be licensed under Kraken 2's MIT license, this distinct counting estimation is now available in Kraken 2. Development work by Martin Steinegger and Ben Langmead helped bring this functionality to Kraken 2. At present, this functionality is an optional *experimental feature* -- meaning that we may later alter it in a way that is not backwards compatible with previous versions of the feature. To use this functionality, simply run the `kraken2` script with the additional `--report-minimizer-data` flag along with `--report`, e.g.: kraken2 --db $DBNAME --report k2_report.txt --report-minimizer-data \ --output k2_output.txt sequence_data.fq This will put the standard Kraken 2 output (formatted as described in [Standard Kraken Output Format]) in `k2_output.txt` and the report information in `k2_report.txt`. Within the report file, two additional columns will be present, e.g.: **normal report format**: 36.40 182 182 S2 211044 Influenza A virus (A/Puerto Rico/8/1934(H1N1)) **modified report format**: 36.40 182 182 1688 18 S2 211044 Influenza A virus (A/Puerto Rico/8/1934(H1N1)) In this modified report format, the two new columns are the fourth and fifth, respectively representing the number of minimizers found to be associated with a taxon in the read sequences (1688), and the estimate of the number of distinct minimizers associated with a taxon in the read sequence data (18). This would indicate that although 182 reads were classified as belonging to H1N1 influenza, only 18 distinct minimizers led to those 182 classifications. The format with the `--report-minimizer-data` flag, then, is similar to that described in [Sample Report Output Format], but slightly different. The fields in this new format, from left-to-right, are: 1. Percentage of fragments covered by the clade rooted at this taxon 2. Number of fragments covered by the clade rooted at this taxon 3. Number of fragments assigned directly to this taxon 4. Number of minimizers in read data associated with this taxon (**new**) 5. An estimate of the number of distinct minimizers in read data associated with this taxon (**new**) 6. A rank code, indicating (U)nclassified, (R)oot, (D)omain, (K)ingdom, (P)hylum, (C)lass, (O)rder, (F)amily, (G)enus, or (S)pecies. Taxa that are not at any of these 10 ranks have a rank code that is formed by using the rank code of the closest ancestor rank with a number indicating the distance from that rank. E.g., "G2" is a rank code indicating a taxon is between genus and species and the grandparent taxon is at the genus rank. 7. NCBI taxonomic ID number 8. Indented scientific name We decided to make this an optional feature so as not to break existing software that processes Kraken 2's standard report format. However, this new format can be converted to the standard report format with the command: cut -f1-3,6-8 k2_new_report.txt > k2_std_report.txt As noted above, this is an *experimental feature*. We intend to continue development on this feature, and may change the new format and/or its information if we determine it to be necessary. For background on the data structures used in this feature and their interaction with Kraken, please read the [KrakenUniq paper], and please cite that paper if you use this functionality as part of your work. [KrakenUniq]: https://github.com/fbreitwieser/krakenuniq [KrakenUniq paper]: https://genomebiology.biomedcentral.com/articles/10.1186/s13059-018-1568-0 Kraken 2 Environment Variables ============================== The `kraken2` and `kraken2-inspect` scripts supports the use of some environment variables to help in reducing command line lengths: * **`KRAKEN2_NUM_THREADS`**: if the `--threads` option is not supplied to `kraken2`, then the value of this variable (if it is set) will be used as the number of threads to run `kraken2`. (This variable does not affect `kraken2-inspect`.) * **`KRAKEN2_DB_PATH`**: much like the `PATH` variable is used for executables by your shell, `KRAKEN2_DB_PATH` is a colon-separated list of directories that will be searched for the database you name if the named database does not have a slash (`/`) character. By default, Kraken 2 assumes the value of this variable is "`.`" (i.e., the current working directory). This variable can be used to create one (or more) central repositories of Kraken databases in a multi-user system. Example usage in bash: export KRAKEN2_DB_PATH="/home/user/my_kraken2_dbs:/data/kraken2_dbs:" This will cause three directories to be searched, in this order: 1) `/home/user/my_kraken2_dbs` 2) `/data/kraken2_dbs` 3) the current working directory (caused by the empty string as the third colon-separated field in the `KRAKEN2_DB_PATH` string) The search for a database will stop when a name match is found; if two directories in the `KRAKEN2_DB_PATH` have databases with the same name, the directory of the two that is searched first will have its database selected. If the above variable and value are used, and the databases `/data/kraken2_dbs/mainDB` and `./mainDB` are present, then kraken2 --db mainDB sequences.fa will classify `sequences.fa` using `/data/kraken_dbs/mainDB`; if instead you wanted to use the `mainDB` present in the current directory, you would need to specify a directory path to that database in order to circumvent searching, e.g.: kraken2 --db ./mainDB sequences.fa Note that the `KRAKEN2_DB_PATH` directory list can be skipped by the use of any absolute (beginning with `/`) or relative pathname (including at least one `/`) as the database name. * **`KRAKEN2_DEFAULT_DB`**: if no database is supplied with the `--db` option, the database named in this variable will be used instead. Using this variable, you can avoid using `--db` if you only have a single database that you usually use, e.g. in bash: export KRAKEN2_DEFAULT_DB="/home/user/kraken2db" kraken2 sequences.fa > kraken2.output This will classify `sequences.fa` using the `/home/user/kraken2db` database. Note that the value of `KRAKEN2_DEFAULT_DB` will also be interpreted in the context of the value of `KRAKEN2_DB_PATH` if you don't set `KRAKEN2_DEFAULT_DB` to an absolute or relative pathname. Given the earlier example in this section, the following: export KRAKEN2_DEFAULT_DB="mainDB" kraken2 sequences.fa will use `/data/kraken_dbs/mainDB` to classify `sequences.fa`. kraken2-2.1.2/docs/Makefile000066400000000000000000000004271404614725400154530ustar00rootroot00000000000000all: pandoc --title-prefix "Kraken 2 Manual" \ --include-in-header head.html \ --include-before-body top.html \ --from markdown --to html \ --table-of-contents \ --css kraken.css \ --output MANUAL.html \ < MANUAL.markdown kraken2-2.1.2/docs/bar-bg.png000066400000000000000000000002741404614725400156530ustar00rootroot00000000000000‰PNG  IHDRš«Ä pHYs  šœtIMEß # %z¤tEXtCommentCreated with GIMPW6IDATHÇcdØðá?à¿??#ŒÍ¸ñã||bÃQ‹G-µxÔâQ‹G-µxÔbúá % Ë"IEND®B`‚kraken2-2.1.2/docs/head.html000066400000000000000000000001571404614725400156020ustar00rootroot00000000000000 kraken2-2.1.2/docs/kraken.css000066400000000000000000000013271404614725400160000ustar00rootroot00000000000000body { background: #f0f0f0 url("bar-bg.png") top left repeat-y; color: #333; margin: 10px; margin-left: 50px; margin-bottom: 20px; font-family: 'Ubuntu', sans-serif; } a { color: #00b0f0; } a:visited { color: #0090f0; } a:hover { color: #00d0ff; } a:active { color: #00f0ff; } .pretoc { text-align: center; font-size: 1.2em; } .title { font-size: 2em; font-weight: bold; margin-bottom: 0; } .version { font-size: 0.9em; } h1 { color: #0090f0; border-bottom: 1px #0090f0 solid; margin-left: -10px; margin-bottom: 3px; } h1 a { color: #0090f0; text-decoration: none; } div#confidence-score-table table th { width: 7em; } pre { margin-left: 4em; } code { font-size: 1.2em; } kraken2-2.1.2/docs/top.html000066400000000000000000000003021404614725400154730ustar00rootroot00000000000000

Kraken taxonomic sequence classification system

Version 2.1.2

Operating Manual

Table of Contents

kraken2-2.1.2/install_kraken2.sh000077500000000000000000000023611404614725400165040ustar00rootroot00000000000000#!/bin/bash # Copyright 2013-2021, Derrick Wood # # This file is part of the Kraken 2 taxonomic sequence classification system. set -e VERSION="2.1.2" if [ -z "$1" ] || [ -n "$2" ] then echo "Usage: $(basename $0) KRAKEN2_DIR" exit 64 fi if [ "$1" = "KRAKEN2_DIR" ] then echo "Please replace \"KRAKEN2_DIR\" with the name of the directory" echo "that you want to install Kraken 2 in." exit 1 fi # Perl cmd used to canonicalize dirname - "readlink -f" doesn't work # on OS X. export KRAKEN2_DIR=$(perl -MCwd=abs_path -le 'print abs_path(shift)' "$1") mkdir -p "$KRAKEN2_DIR" make -C src install for file in scripts/* do perl -pl -e 'BEGIN { while (@ARGV) { $_ = shift; ($k,$v) = split /=/, $_, 2; $H{$k} = $v } }'\ -e 's/#####=(\w+)=#####/$H{$1}/g' \ "KRAKEN2_DIR=$KRAKEN2_DIR" "VERSION=$VERSION" \ < "$file" > "$KRAKEN2_DIR/$(basename $file)" if [ -x "$file" ] then chmod +x "$KRAKEN2_DIR/$(basename $file)" fi done echo echo "Kraken 2 installation complete." echo echo "To make things easier for you, you may want to copy/symlink the following" echo "files into a directory in your PATH:" for file in $KRAKEN2_DIR/kraken2* do if [ -x "$file" ] then echo " $file" fi done kraken2-2.1.2/scripts/000077500000000000000000000000001404614725400145475ustar00rootroot00000000000000kraken2-2.1.2/scripts/16S_gg_installation.sh000077500000000000000000000016771404614725400207300ustar00rootroot00000000000000#!/bin/bash # Copyright 2013-2021, Derrick Wood # # This file is part of the Kraken 2 taxonomic sequence classification system. # Build a 16S database from Greengenes data set -u # Protect against uninitialized vars. set -e # Stop on error set -o pipefail # Stop on failures in non-final pipeline commands FTP_SERVER="ftp://greengenes.microbio.me/" GG_VERSION="gg_13_5" REMOTE_DIR="$FTP_SERVER/greengenes_release/$GG_VERSION" mkdir -p "$KRAKEN2_DB_NAME" pushd "$KRAKEN2_DB_NAME" mkdir -p data taxonomy library pushd data wget "$REMOTE_DIR/${GG_VERSION}.fasta.gz" gunzip "${GG_VERSION}.fasta.gz" wget "$REMOTE_DIR/${GG_VERSION}_taxonomy.txt.gz" gunzip "${GG_VERSION}_taxonomy.txt.gz" build_gg_taxonomy.pl "${GG_VERSION}_taxonomy.txt" popd mv data/names.dmp data/nodes.dmp taxonomy/ mv data/seqid2taxid.map . mv "data/${GG_VERSION}.fasta" library/gg.fna popd kraken2-build --db $KRAKEN2_DB_NAME --build --threads $KRAKEN2_THREAD_CT kraken2-2.1.2/scripts/16S_rdp_installation.sh000077500000000000000000000016741404614725400211150ustar00rootroot00000000000000#!/bin/bash # Copyright 2013-2021, Derrick Wood # # This file is part of the Kraken 2 taxonomic sequence classification system. # Build a 16S database from RDP data set -u # Protect against uninitialized vars. set -e # Stop on error set -o pipefail # Stop on failures in non-final pipeline commands HTTP_SERVER="http://rdp.cme.msu.edu/" REMOTE_DIR="$HTTP_SERVER/download/" mkdir -p "$KRAKEN2_DB_NAME" pushd "$KRAKEN2_DB_NAME" mkdir -p data taxonomy library pushd data wget "$REMOTE_DIR/current_Bacteria_unaligned.fa.gz" gunzip "current_Bacteria_unaligned.fa.gz" wget "$REMOTE_DIR/current_Archaea_unaligned.fa.gz" gunzip "current_Archaea_unaligned.fa.gz" build_rdp_taxonomy.pl current_*_unaligned.fa popd mv data/names.dmp data/nodes.dmp taxonomy/ mv data/seqid2taxid.map . for file in data/*.fa; do mv $file library/$(basename $file .fa).fna done popd kraken2-build --db $KRAKEN2_DB_NAME --build --threads $KRAKEN2_THREAD_CT kraken2-2.1.2/scripts/16S_silva_installation.sh000077500000000000000000000022511404614725400214360ustar00rootroot00000000000000#!/bin/bash # Copyright 2013-2021, Derrick Wood # # This file is part of the Kraken 2 taxonomic sequence classification system. # Build a 16S database from Silva data set -u # Protect against uninitialized vars. set -e # Stop on error set -o pipefail # Stop on failures in non-final pipeline commands HTTPS_SERVER="https://ftp.arb-silva.de/" SILVA_VERSION="138_1" REMOTE_DIR="$HTTPS_SERVER/release_$SILVA_VERSION/Exports" FASTA_FILENAME="SILVA_${SILVA_VERSION/_/.}_SSURef_NR99_tax_silva.fasta" TAXO_PREFIX="tax_slv_ssu_${SILVA_VERSION/_/.}" mkdir -p "$KRAKEN2_DB_NAME" pushd "$KRAKEN2_DB_NAME" mkdir -p data taxonomy library pushd data wget "$REMOTE_DIR/${FASTA_FILENAME}.gz" gunzip "${FASTA_FILENAME}.gz" wget "$REMOTE_DIR/taxonomy/${TAXO_PREFIX}.acc_taxid.gz" gunzip "${TAXO_PREFIX}.acc_taxid.gz" wget "$REMOTE_DIR/taxonomy/${TAXO_PREFIX}.txt.gz" gunzip "${TAXO_PREFIX}.txt.gz" build_silva_taxonomy.pl "${TAXO_PREFIX}.txt" popd mv data/names.dmp data/nodes.dmp taxonomy/ mv data/${TAXO_PREFIX}.acc_taxid seqid2taxid.map sed -e '/^>/!y/U/T/' "data/$FASTA_FILENAME" > library/silva.fna popd kraken2-build --db $KRAKEN2_DB_NAME --build --threads $KRAKEN2_THREAD_CT kraken2-2.1.2/scripts/add_to_library.sh000077500000000000000000000016631404614725400200720ustar00rootroot00000000000000#!/bin/bash # Copyright 2013-2021, Derrick Wood # # This file is part of the Kraken 2 taxonomic sequence classification system. # Copy specified file into a Kraken library set -u # Protect against uninitialized vars. set -e # Stop on error LIBRARY_DIR="$KRAKEN2_DB_NAME/library" if [ ! -e "$1" ] then 1>&2 echo "Can't add \"$1\": file does not exist" exit 1 fi if [ ! -f "$1" ] then 1>&2 echo "Can't add \"$1\": not a regular file" exit 1 fi add_dir="$LIBRARY_DIR/added" mkdir -p "$add_dir" prelim_map=$(cp_into_tempfile.pl -t "prelim_map_XXXXXXXXXX" -d "$add_dir" -s txt /dev/null) scan_fasta_file.pl "$1" > "$prelim_map" filename=$(cp_into_tempfile.pl -t "XXXXXXXXXX" -d "$add_dir" -s fna "$1") if [ -n "$KRAKEN2_MASK_LC" ]; then 1>&2 echo -n "Masking low-complexity regions of new file..." mask_low_complexity.sh $filename 1>&2 echo " done." fi 1>&2 echo "Added \"$1\" to library ($KRAKEN2_DB_NAME)" kraken2-2.1.2/scripts/build_gg_taxonomy.pl000077500000000000000000000046061404614725400206270ustar00rootroot00000000000000#!/usr/bin/env perl # Copyright 2013-2021, Derrick Wood # # This file is part of the Kraken 2 taxonomic sequence classification system. # Parses Greengenes taxonomy file to create Kraken taxonomy # and sequence ID -> taxonomy ID mapping # Input (as <>): gg_13_5_taxonomy.txt use strict; use warnings; use File::Basename; my $PROG = basename $0; my %RANK_CODES = ( k => "superkingdom", p => "phylum", c => "class", o => "order", f => "family", g => "genus", s => "species" ); my %seqid_map; my %seen_it; my %child_data = ("root" => {}); LINE: while (<>) { chomp; my ($seqid, $taxo_str) = split /\t/; $taxo_str =~ s/(; [a-z]__)+$//; # Remove empty data $seqid_map{$seqid} = $taxo_str; next if $seen_it{$taxo_str}++; while ($taxo_str =~ s/(; [a-z]__[^;]+$)//) { my $level = $1; my $parent = $taxo_str; $child_data{$parent} ||= {}; $child_data{$parent}->{"$taxo_str$level"}++; next LINE if $seen_it{$taxo_str}++; } $child_data{"root"}->{$taxo_str}++; } # Assign IDs through BFS of tree, report names/nodes info in # NCBI format my %id_map; my $next_node_id = 1; open NAMES, ">", "names.dmp" or die "$PROG: can't write names.dmp: $!\n"; open NODES, ">", "nodes.dmp" or die "$PROG: can't write nodes.dmp: $!\n"; my @bfs_queue = (["root", 1]); while (@bfs_queue) { my $arg_ref = shift @bfs_queue; my ($node, $parent_id) = @$arg_ref; my $display_name = $node; my $rank; # special handling for species if ($node =~ /g__([^;]+); s__([^;]+)$/) { my ($genus, $species) = ($1, $2); $rank = "species"; if ($species =~ / endosymbiont /) { $display_name = $species; } else { $display_name = "$genus $species"; } } elsif ($node =~ /([a-z])__([^;]+)$/) { $rank = $RANK_CODES{$1}; $display_name = $2; } $rank ||= "no rank"; my $node_id = $next_node_id++; $id_map{$node} = $node_id; print NAMES "$node_id\t|\t$display_name\t|\t-\t|\tscientific name\t|\n"; print NODES "$node_id\t|\t$parent_id\t|\t$rank\t|\t-\t|\n"; my @children = sort keys %{ $child_data{$node} }; push @bfs_queue, [$_, $node_id] for @children; } close NAMES; close NODES; open SEQID_TAXID_MAP, ">", "seqid2taxid.map" or die "$PROG: can't write seqid2taxid.map: $!\n"; for my $seqid (sort { $a <=> $b } keys %seqid_map) { my $taxid = $id_map{ $seqid_map{$seqid} }; print SEQID_TAXID_MAP "$seqid\t$taxid\n"; } close SEQID_TAXID_MAP; kraken2-2.1.2/scripts/build_kraken2_db.sh000077500000000000000000000110561404614725400202720ustar00rootroot00000000000000#!/bin/bash # Copyright 2013-2021, Derrick Wood # # This file is part of the Kraken 2 taxonomic sequence classification system. # Build a Kraken 2 database # Designed to be called by kraken2-build set -u # Protect against uninitialized vars. set -e # Stop on error set -o pipefail # Stop on failures in non-final pipeline commands function finalize_file() { mv $1.tmp $1 } function get_current_time() { date "+%s.%N" } function report_time_elapsed() { curr_time=$(get_current_time) perl -e '$time = $ARGV[1] - $ARGV[0];' \ -e '$sec = int($time); $nsec = $time - $sec;' \ -e '$min = int($sec/60); $sec %= 60;' \ -e '$hr = int($min/60); $min %= 60;' \ -e 'print "${hr}h" if $hr;' \ -e 'print "${min}m" if $min || $hr;' \ -e 'printf "%.3fs", $sec + $nsec;' \ $1 $curr_time } function list_sequence_files() { find library/ '(' -name '*.fna' -o -name '*.faa' ')' -print0 } start_time=$(get_current_time) DATABASE_DIR="$KRAKEN2_DB_NAME" if [ ! -d "$DATABASE_DIR" ] then echo "Can't find Kraken 2 DB directory \"$KRAKEN2_DB_NAME\"" exit 1 fi cd "$DATABASE_DIR" if [ ! -d "taxonomy/" ] then echo "Can't find taxonomy/ subdirectory in database directory, exiting." exit 1 fi if [ ! -d "library/" ] then echo "Can't find library/ subdirectory in database directory, exiting." exit 1 fi KRAKEN2XFLAG="" if [ -n "$KRAKEN2_PROTEIN_DB" ] then KRAKEN2XFLAG="-X" fi echo "Creating sequence ID to taxonomy ID map (step 1)..." if [ -d "library/added" ]; then find library/added/ -name 'prelim_map_*.txt' | xargs cat > library/added/prelim_map.txt fi seqid2taxid_map_file=seqid2taxid.map if [ -e "$seqid2taxid_map_file" ]; then echo "Sequence ID to taxonomy ID map already present, skipping map creation." else step_time=$(get_current_time) find library/ -maxdepth 2 -name prelim_map.txt | xargs cat > taxonomy/prelim_map.txt if [ ! -s "taxonomy/prelim_map.txt" ]; then echo "No preliminary seqid/taxid mapping files found, aborting." exit 1 fi grep "^TAXID" taxonomy/prelim_map.txt | cut -f 2- > $seqid2taxid_map_file.tmp || true if grep "^ACCNUM" taxonomy/prelim_map.txt | cut -f 2- > accmap_file.tmp; then if compgen -G "taxonomy/*.accession2taxid" > /dev/null; then lookup_accession_numbers accmap_file.tmp taxonomy/*.accession2taxid > seqid2taxid_acc.tmp cat seqid2taxid_acc.tmp >> $seqid2taxid_map_file.tmp rm seqid2taxid_acc.tmp else echo "Accession to taxid map files are required to build this DB." echo "Run 'kraken2-build --db $KRAKEN2_DB_NAME --download-taxonomy' again?" exit 1 fi fi rm -f accmap_file.tmp finalize_file $seqid2taxid_map_file echo "Sequence ID to taxonomy ID map complete. [$(report_time_elapsed $step_time)]" fi echo "Estimating required capacity (step 2)..." step_time=$(get_current_time) estimate=$(list_sequence_files | xargs -0 cat | estimate_capacity -k $KRAKEN2_KMER_LEN -l $KRAKEN2_MINIMIZER_LEN -S $KRAKEN2_SEED_TEMPLATE -p $KRAKEN2_THREAD_CT $KRAKEN2XFLAG ) # Slight upward adjustment of distinct minimizer estimate to protect # against crash w/ small reference sets estimate=$(( estimate + 8192 )) required_capacity=$(perl -le 'print int(shift() / shift())' $estimate $KRAKEN2_LOAD_FACTOR); echo "Estimated hash table requirement: $(( required_capacity * 4 )) bytes" max_db_flag="" if [ -n "$KRAKEN2_MAX_DB_SIZE" ] then if (( KRAKEN2_MAX_DB_SIZE < (required_capacity * 4) )) then max_db_flag="-M $(perl -le 'print int(shift() / 4)' $KRAKEN2_MAX_DB_SIZE)" echo "Specifying lower maximum hash table size of $KRAKEN2_MAX_DB_SIZE bytes" fi fi echo "Capacity estimation complete. [$(report_time_elapsed $step_time)]" echo "Building database files (step 3)..." fast_build_flag="" if [ -n "$KRAKEN2_FAST_BUILD" ] then fast_build_flag="-F" fi if [ -e "hash.k2d" ] then echo "Hash table already present, skipping database file build." else step_time=$(get_current_time) list_sequence_files | xargs -0 cat | \ build_db -k $KRAKEN2_KMER_LEN -l $KRAKEN2_MINIMIZER_LEN -S $KRAKEN2_SEED_TEMPLATE $KRAKEN2XFLAG \ -H hash.k2d.tmp -t taxo.k2d.tmp -o opts.k2d.tmp -n taxonomy/ -m $seqid2taxid_map_file \ -c $required_capacity -p $KRAKEN2_THREAD_CT $max_db_flag -B $KRAKEN2_BLOCK_SIZE -b $KRAKEN2_SUBBLOCK_SIZE \ -r $KRAKEN2_MIN_TAXID_BITS $fast_build_flag finalize_file taxo.k2d finalize_file opts.k2d finalize_file hash.k2d echo "Database files completed. [$(report_time_elapsed $step_time)]" fi echo "Database construction complete. [Total: $(report_time_elapsed $start_time)]" kraken2-2.1.2/scripts/build_rdp_taxonomy.pl000077500000000000000000000042171404614725400210150ustar00rootroot00000000000000#!/usr/bin/env perl # Copyright 2013-2021, Derrick Wood # # This file is part of the Kraken 2 taxonomic sequence classification system. # Parses RDP sequence data to create Kraken taxonomy # and sequence ID -> taxonomy ID mapping # Input (as <>): current_{Archaea,Bacteria}_unaligned.fa use strict; use warnings; use File::Basename; my $PROG = basename $0; my %seqid_map; my %seen_it; my %child_data = ("root;no rank" => {}); LINE: while (<>) { next unless s/^>//; chomp; my ($seq_label, $taxo_str) = split /\t/; my ($seqid) = split " ", $seq_label; $taxo_str =~ s/^Lineage=Root;rootrank;/root;no rank;/; $taxo_str =~ s/;$/;no rank/; # adjust for unclassified things $seqid_map{$seqid} = $taxo_str; next if $seen_it{$taxo_str}++; while ($taxo_str =~ s/(;[^;]+;[^;]+)$//) { my $level = $1; my $parent = $taxo_str; $child_data{$parent} ||= {}; $child_data{$parent}->{"$taxo_str$level"}++; next LINE if $seen_it{$taxo_str}++; } } # Assign IDs through BFS of tree, report names/nodes info in # NCBI format my %id_map; my $next_node_id = 1; open NAMES, ">", "names.dmp" or die "$PROG: can't write names.dmp: $!\n"; open NODES, ">", "nodes.dmp" or die "$PROG: can't write nodes.dmp: $!\n"; my @bfs_queue = (["root;no rank", 1]); while (@bfs_queue) { my $arg_ref = shift @bfs_queue; my ($node, $parent_id) = @$arg_ref; if ($node !~ /([^;]+);([^;]+)$/) { die "$PROG: BFS processing encountered formatting error, \"$node\"\n"; } my ($display_name, $rank) = ($1, $2); $rank = "superkingdom" if $rank eq "domain"; # conform w/ NCBI taxonomy my $node_id = $next_node_id++; $id_map{$node} = $node_id; print NAMES "$node_id\t|\t$display_name\t|\t-\t|\tscientific name\t|\n"; print NODES "$node_id\t|\t$parent_id\t|\t$rank\t|\t-\t|\n"; my @children = sort keys %{ $child_data{$node} }; push @bfs_queue, [$_, $node_id] for @children; } close NAMES; close NODES; open SEQID_TAXID_MAP, ">", "seqid2taxid.map" or die "$PROG: can't write seqid2taxid.map: $!\n"; for my $seqid (sort keys %seqid_map) { my $taxid = $id_map{ $seqid_map{$seqid} }; print SEQID_TAXID_MAP "$seqid\t$taxid\n"; } close SEQID_TAXID_MAP; kraken2-2.1.2/scripts/build_silva_taxonomy.pl000077500000000000000000000023651404614725400213500ustar00rootroot00000000000000#!/usr/bin/env perl # Copyright 2013-2021, Derrick Wood # # This file is part of the Kraken 2 taxonomic sequence classification system. # Parses Silva taxonomy file to create Kraken taxonomy # Input (as <>): tax_slv_ssu_nr_119.txt use strict; use warnings; use File::Basename; my $PROG = basename $0; my %id_map = ("root" => 1); open NAMES, ">", "names.dmp" or die "$PROG: can't write names.dmp: $!\n"; open NODES, ">", "nodes.dmp" or die "$PROG: can't write nodes.dmp: $!\n"; print NAMES "1\t|\troot\t|\t-\t|\tscientific name\t|\n"; print NODES "1\t|\t1\t|\tno rank\t|\t-\t|\n"; while (<>) { chomp; my ($taxo_str, $node_id, $rank) = split /\t/; $id_map{$taxo_str} = $node_id; if ($taxo_str =~ /^(.+;|)([^;]+);$/) { my $parent_name = $1; my $display_name = $2; if ($parent_name eq "") { $parent_name = "root"; } my $parent_id = $id_map{$parent_name}; if (! defined $parent_id) { die "$PROG: orphan error, line $.\n"; } $rank = "superkingdom" if $rank eq "domain"; print NAMES "$node_id\t|\t$display_name\t|\t-\t|\tscientific name\t|\n"; print NODES "$node_id\t|\t$parent_id\t|\t$rank\t|\t-\t|\n"; } else { die "$PROG: strange input, line $.\n"; } } close NAMES; close NODES; kraken2-2.1.2/scripts/clean_db.sh000077500000000000000000000011071404614725400166340ustar00rootroot00000000000000#!/bin/bash # Copyright 2013-2021, Derrick Wood # # This file is part of the Kraken 2 taxonomic sequence classification system. # Removes intermediate files from a database directory, # such as reference library FASTA files and taxonomy data from NCBI. set -u # Protect against uninitialized vars. set -e # Stop on error cd $KRAKEN2_DB_NAME previous_usage=$(du -sh | cut -f1) 1>&2 echo "Database disk usage: $previous_usage" rm -rf library/ taxonomy/ seqid2taxid.map current_usage=$(du -sh | cut -f1) 1>&2 echo "After cleaning, database uses $current_usage" kraken2-2.1.2/scripts/cp_into_tempfile.pl000077500000000000000000000023751404614725400204360ustar00rootroot00000000000000#!/usr/bin/env perl # Copyright 2013-2021, Derrick Wood # # This file is part of the Kraken 2 taxonomic sequence classification system. # Create a file in a specified directory, then copy an # existing file's contents into the new file. Write name of # new file to standard output. # # This exists because the mktemp program doesn't act consistently across # operating systems/distros/versions. use strict; use warnings; use File::Basename; use File::Temp 'tempfile'; use Getopt::Std; my $PROG = basename $0; getopts('d:t:s:', \my %opts) or usage(); $opts{$_} or usage() for qw/d t s/; # all switches mandatory my ($directory, $template, $suffix) = @opts{qw/d t s/}; die "$PROG: '$directory' not a directory!\n" unless -d $directory; die "$PROG: must specify a single filename\n" unless @ARGV == 1; $suffix =~ s/^\.//; my $old_filename = shift @ARGV; open FILE, "<", $old_filename or die "$PROG: can't read $old_filename: $!\n"; my ($fh, $new_filename) = tempfile($template, DIR => $directory, UNLINK => 0, SUFFIX => ".$suffix"); # copy loop while () { print {$fh} $_; } close FILE; close $fh; print "$new_filename\n"; sub usage { die "$PROG: <-d directory> <-t template> <-s suffix> \n"; } kraken2-2.1.2/scripts/download_genomic_library.sh000077500000000000000000000106211404614725400221420ustar00rootroot00000000000000#!/bin/bash # Copyright 2013-2021, Derrick Wood # # This file is part of the Kraken 2 taxonomic sequence classification system. # Download specific genomic libraries for use with Kraken 2. # Supported libraries were chosen based on support from NCBI's FTP site # in easily obtaining a good collection of genomic data. Others may # be added upon popular demand. set -u # Protect against uninitialized vars. set -e # Stop on error LIBRARY_DIR="$KRAKEN2_DB_NAME/library" NCBI_SERVER="ftp.ncbi.nlm.nih.gov" FTP_SERVER="ftp://$NCBI_SERVER" RSYNC_SERVER="rsync://$NCBI_SERVER" THIS_DIR=$PWD library_name="$1" ftp_subdir=$library_name library_file="library.fna" if [ -n "$KRAKEN2_PROTEIN_DB" ]; then library_file="library.faa" fi function download_file() { file="$1" if [ -n "$KRAKEN2_USE_FTP" ] then wget -q ${FTP_SERVER}${file} else rsync --no-motd ${RSYNC_SERVER}${file} . fi } case $library_name in "archaea" | "bacteria" | "viral" | "fungi" | "plant" | "human" | "protozoa") mkdir -p $LIBRARY_DIR/$library_name cd $LIBRARY_DIR/$library_name rm -f assembly_summary.txt remote_dir_name=$library_name if [ "$library_name" = "human" ]; then remote_dir_name="vertebrate_mammalian/Homo_sapiens" fi if ! download_file "/genomes/refseq/$remote_dir_name/assembly_summary.txt"; then 1>&2 echo "Error downloading assembly summary file for $library_name, exiting." exit 1 fi if [ "$library_name" = "human" ]; then grep "Genome Reference Consortium" assembly_summary.txt > x mv x assembly_summary.txt fi rm -rf all/ library.f* manifest.txt rsync.err rsync_from_ncbi.pl assembly_summary.txt scan_fasta_file.pl $library_file >> prelim_map.txt ;; "plasmid") mkdir -p $LIBRARY_DIR/plasmid cd $LIBRARY_DIR/plasmid rm -f library.f* plasmid.* ## This is staying FTP only D/L for now 1>&2 echo -n "Downloading plasmid files from FTP..." wget -q --no-remove-listing --spider $FTP_SERVER/genomes/refseq/plasmid/ if [ -n "$KRAKEN2_PROTEIN_DB" ]; then awk '{ print $NF }' .listing | perl -ple 'tr/\r//d' | grep '\.faa\.gz' > manifest.txt else awk '{ print $NF }' .listing | perl -ple 'tr/\r//d' | grep '\.fna\.gz' > manifest.txt fi cat manifest.txt | xargs -n1 -I{} wget -q $FTP_SERVER/genomes/refseq/plasmid/{} cat manifest.txt | xargs -n1 -I{} gunzip -c {} > $library_file rm -f plasmid.* .listing scan_fasta_file.pl $library_file > prelim_map.txt 1>&2 echo " done." ;; "nr" | "nt") protein_lib=0 if [ "$library_name" = "nr" ]; then protein_lib=1 fi if (( protein_lib == 1 )) && [ -z "$KRAKEN2_PROTEIN_DB" ]; then 1>&2 echo "$library_name is a protein database, and the Kraken DB specified is nucleotide" exit 1 fi mkdir -p $LIBRARY_DIR/$library_name cd $LIBRARY_DIR/$library_name rm -f $library_name.gz 1>&2 echo -n "Downloading $library_name database from server... " download_file "/blast/db/FASTA/$library_name.gz" 1>&2 echo "done." 1>&2 echo -n "Uncompressing $library_name database..." gunzip $library_name.gz mv $library_name $library_file 1>&2 echo "done." 1>&2 echo -n "Parsing $library_name FASTA file..." # The nr/nt files tend to have non-standard sequence IDs, so # --lenient is used here. scan_fasta_file.pl --lenient $library_file >> prelim_map.txt 1>&2 echo "done." ;; "UniVec" | "UniVec_Core") if [ -n "$KRAKEN2_PROTEIN_DB" ]; then 1>&2 echo "$library_name is for nucleotide databases only" exit 1 fi mkdir -p $LIBRARY_DIR/$library_name cd $LIBRARY_DIR/$library_name 1>&2 echo -n "Downloading $library_name data from server... " download_file "/pub/UniVec/$library_name" 1>&2 echo "done." # 28384: "other sequences" special_taxid=28384 1>&2 echo -n "Adding taxonomy ID of $special_taxid to all sequences... " sed -e "s/^>/>kraken:taxid|$special_taxid|/" $library_name > library.fna scan_fasta_file.pl library.fna > prelim_map.txt 1>&2 echo "done." ;; *) 1>&2 echo "Unsupported library. Valid options are: " 1>&2 echo " archaea bacteria viral fungi plant protozoa human plasmid" 1>&2 echo " nr nt UniVec UniVec_Core" exit 1 ;; esac if [ -n "$KRAKEN2_MASK_LC" ]; then 1>&2 echo -n "Masking low-complexity regions of downloaded library..." mask_low_complexity.sh . 1>&2 echo " done." fi kraken2-2.1.2/scripts/download_taxonomy.sh000077500000000000000000000033401404614725400206530ustar00rootroot00000000000000#!/bin/bash # Copyright 2013-2021, Derrick Wood # # This file is part of the Kraken 2 taxonomic sequence classification system. # Download NCBI taxonomy information for Kraken 2. # Designed to be called by kraken2-build set -u # Protect against uninitialized vars. set -e # Stop on error TAXONOMY_DIR="$KRAKEN2_DB_NAME/taxonomy" NCBI_SERVER="ftp.ncbi.nlm.nih.gov" RSYNC_SERVER="rsync://$NCBI_SERVER" FTP_SERVER="ftp://$NCBI_SERVER" mkdir -p "$TAXONOMY_DIR" cd "$TAXONOMY_DIR" function download_file() { file="$1" if [ -n "$KRAKEN2_USE_FTP" ] then wget -q ${FTP_SERVER}${file} else rsync --no-motd ${RSYNC_SERVER}${file} . fi } if [ ! -e "accmap.dlflag" ] && [ -z "$KRAKEN2_SKIP_MAPS" ] then if [ -z "$KRAKEN2_PROTEIN_DB" ] then for subsection in gb wgs do 1>&2 echo -n "Downloading nucleotide ${subsection} accession to taxon map..." download_file "/pub/taxonomy/accession2taxid/nucl_${subsection}.accession2taxid.gz" 1>&2 echo " done." done else 1>&2 echo -n "Downloading protein accession to taxon map..." download_file "/pub/taxonomy/accession2taxid/prot.accession2taxid.gz" 1>&2 echo " done." fi touch accmap.dlflag 1>&2 echo "Downloaded accession to taxon map(s)" fi if [ ! -e "taxdump.dlflag" ] then 1>&2 echo -n "Downloading taxonomy tree data..." download_file "/pub/taxonomy/taxdump.tar.gz" touch taxdump.dlflag 1>&2 echo " done." fi if ls | grep -q 'accession2taxid\.gz$' then 1>&2 echo -n "Uncompressing taxonomy data..." gunzip *accession2taxid.gz 1>&2 echo " done." fi if [ ! -e "taxdump.untarflag" ] then 1>&2 echo -n "Untarring taxonomy tree data..." tar zxf taxdump.tar.gz touch taxdump.untarflag 1>&2 echo " done." fi kraken2-2.1.2/scripts/kraken2000077500000000000000000000177411404614725400160440ustar00rootroot00000000000000#!/usr/bin/env perl # Copyright 2013-2021, Derrick Wood # # This file is part of the Kraken 2 taxonomic sequence classification system. # Wrapper for Kraken's classifier use strict; use warnings; use Fcntl; use File::Basename; use Getopt::Long; my $PROG = basename $0; my $KRAKEN2_DIR = '#####=KRAKEN2_DIR=#####'; # Test to see if the executables got moved, try to recover if we can if (! -e "$KRAKEN2_DIR/classify") { use Cwd 'abs_path'; $KRAKEN2_DIR = dirname abs_path($0); } require "$KRAKEN2_DIR/kraken2lib.pm"; $ENV{"KRAKEN2_DIR"} = $KRAKEN2_DIR; $ENV{"PATH"} = "$KRAKEN2_DIR:$ENV{PATH}"; my $CLASSIFY = "$KRAKEN2_DIR/classify"; my $GZIP_MAGIC = chr(hex "1f") . chr(hex "8b"); my $BZIP2_MAGIC = "BZ"; my $quick = 0; my $min_hits = 1; my $db_prefix; my $threads; my $memory_mapping = 0; my $gunzip = 0; my $bunzip2 = 0; my $paired = 0; my $names_in_output = 0; my $only_classified_output = 0; my $unclassified_out; my $classified_out; my $outfile; my $confidence_threshold = 0.0; my $minimum_base_quality = 0; my $report_filename; my $use_mpa_style = 0; my $report_zero_counts = 0; my $minimum_hit_groups = 2; my $report_minimizer_data = 0; GetOptions( "help" => \&display_help, "version" => \&display_version, "db=s" => \$db_prefix, "threads=i" => \$threads, "quick" => \$quick, "unclassified-out=s" => \$unclassified_out, "classified-out=s" => \$classified_out, "output=s" => \$outfile, "confidence=f" => \$confidence_threshold, "memory-mapping" => \$memory_mapping, "paired" => \$paired, "use-names" => \$names_in_output, "gzip-compressed" => \$gunzip, "bzip2-compressed" => \$bunzip2, "only-classified-output" => \$only_classified_output, "minimum-base-quality=i" => \$minimum_base_quality, "report=s" => \$report_filename, "use-mpa-style" => \$use_mpa_style, "report-zero-counts" => \$report_zero_counts, "minimum-hit-groups=i" => \$minimum_hit_groups, "report-minimizer-data" => \$report_minimizer_data, ); if (! defined $threads) { $threads = $ENV{"KRAKEN2_NUM_THREADS"} || 1; } if (! @ARGV) { print STDERR "Need to specify input filenames!\n"; usage(); } eval { $db_prefix = kraken2lib::find_db($db_prefix); }; if ($@) { die "$PROG: $@"; } my $taxonomy = "$db_prefix/taxo.k2d"; my $kht_file = "$db_prefix/hash.k2d"; my $opt_file = "$db_prefix/opts.k2d"; for my $file ($taxonomy, $kht_file, $opt_file) { if (! -e $file) { die "$PROG: $file does not exist!\n"; } } if ($paired && ((@ARGV % 2) != 0 || @ARGV == 0)) { die "$PROG: --paired requires positive and even number filenames\n"; } my $compressed = $gunzip || $bunzip2; if ($gunzip && $bunzip2) { die "$PROG: can't use both gzip and bzip2 compression flags\n"; } if ($confidence_threshold < 0) { die "$PROG: confidence threshold must be nonnegative\n"; } if ($confidence_threshold > 1) { die "$PROG: confidence threshold must be no greater than 1\n"; } if ($minimum_hit_groups < 0) { die "$PROG: minimum number of hit groups must be nonnegative\n"; } my $auto_detect = ! $compressed; if ($auto_detect) { auto_detect_file_format(); } # set flags for classifier my @flags; push @flags, "-H", $kht_file; push @flags, "-t", $taxonomy; push @flags, "-o", $opt_file; push @flags, "-p", $threads; push @flags, "-q" if $quick; push @flags, "-P" if $paired; push @flags, "-n" if $names_in_output; push @flags, "-T", $confidence_threshold; push @flags, "-U", $unclassified_out if defined $unclassified_out; push @flags, "-C", $classified_out if defined $classified_out; push @flags, "-O", $outfile if defined $outfile; push @flags, "-Q", $minimum_base_quality; push @flags, "-R", $report_filename if defined $report_filename; push @flags, "-m" if $use_mpa_style; push @flags, "-z" if $report_zero_counts; push @flags, "-M" if $memory_mapping; push @flags, "-g", $minimum_hit_groups; push @flags, "-K" if $report_minimizer_data; # Stupid hack to keep filehandles from closing before exec # filehandles opened inside for loop below go out of scope # and are closed at end of loop without this my @persistent_fhs; # handle compressed files by opening pipes from decompression programs if ($compressed) { my @replacement_ARGV; my $compression_program; if ($gunzip) { $compression_program = "gzip"; } elsif ($bunzip2) { $compression_program = "bzip2"; } else { die "$PROG: unrecognized compression program! This is a Kraken bug.\n"; } for my $file (@ARGV) { my $qm_file = quotemeta $file; open my $fh, "$compression_program -dc $qm_file |" or die "$PROG: error opening pipe from $compression_program for $file: $!\n"; # Have to unset close-on-exec flags to make these pipes stay open across # exec call my $flags = fcntl $fh, F_GETFD, 0 or die "$PROG: fcntl GETFD error: $!\n"; fcntl $fh, F_SETFD, ($flags & ~FD_CLOEXEC) or die "$PROG: fcntl SETFD error: $!\n"; push @persistent_fhs, $fh; my $fd = fileno $fh; push @replacement_ARGV, "/dev/fd/$fd"; } @ARGV = @replacement_ARGV; } exec $CLASSIFY, @flags, @ARGV; die "$PROG: exec error: $!\n"; sub usage { my $exit_code = @_ ? shift : 64; my $default_db = "none"; eval { $default_db = '"' . kraken2lib::find_db() . '"'; }; my $def_thread_ct = exists $ENV{"KRAKEN2_NUM_THREADS"} ? (0 + $ENV{"KRAKEN2_NUM_THREADS"}) : 1; print STDERR < Options: --db NAME Name for Kraken 2 DB (default: $default_db) --threads NUM Number of threads (default: $def_thread_ct) --quick Quick operation (use first hit or hits) --unclassified-out FILENAME Print unclassified sequences to filename --classified-out FILENAME Print classified sequences to filename --output FILENAME Print output to filename (default: stdout); "-" will suppress normal output --confidence FLOAT Confidence score threshold (default: 0.0); must be in [0, 1]. --minimum-base-quality NUM Minimum base quality used in classification (def: 0, only effective with FASTQ input). --report FILENAME Print a report with aggregrate counts/clade to file --use-mpa-style With --report, format report output like Kraken 1's kraken-mpa-report --report-zero-counts With --report, report counts for ALL taxa, even if counts are zero --report-minimizer-data With --report, report minimizer and distinct minimizer count information in addition to normal Kraken report --memory-mapping Avoids loading database into RAM --paired The filenames provided have paired-end reads --use-names Print scientific names instead of just taxids --gzip-compressed Input files are compressed with gzip --bzip2-compressed Input files are compressed with bzip2 --minimum-hit-groups NUM Minimum number of hit groups (overlapping k-mers sharing the same minimizer) needed to make a call (default: $minimum_hit_groups) --help Print this message --version Print version information If none of the *-compressed flags are specified, and the filename provided is a regular file, automatic format detection is attempted. EOF exit $exit_code; } sub display_help { usage(0); } sub display_version { print "Kraken version #####=VERSION=#####\n"; print "Copyright 2013-2021, Derrick Wood (dwood\@cs.jhu.edu)\n"; exit 0; } sub auto_detect_file_format { my $magic; my $filename = $ARGV[0]; # Don't try to auto-detect when you can't unread data if (! -f $filename) { return; } # read 2-byte magic number to determine type of compression (if any) open FILE, "<", $filename; read FILE, $magic, 2; close FILE; if ($magic eq $GZIP_MAGIC) { $compressed = 1; $gunzip = 1; } elsif ($magic eq $BZIP2_MAGIC) { $compressed = 1; $bunzip2 = 1; } } kraken2-2.1.2/scripts/kraken2-build000077500000000000000000000253131404614725400171330ustar00rootroot00000000000000#!/usr/bin/env perl # Copyright 2013-2021, Derrick Wood # # This file is part of the Kraken 2 taxonomic sequence classification system. # General build process wrapper for Kraken 2. use strict; use warnings; use File::Basename; use Getopt::Long; use POSIX; my $PROG = basename $0; my $KRAKEN2_DIR = '#####=KRAKEN2_DIR=#####'; # Test to see if the executables got moved, try to recover if we can if (! -e "$KRAKEN2_DIR/classify") { use Cwd 'abs_path'; $KRAKEN2_DIR = dirname abs_path($0); } $ENV{"KRAKEN2_DIR"} = $KRAKEN2_DIR; $ENV{"PATH"} = "$KRAKEN2_DIR:$ENV{PATH}"; my $DEF_AA_MINIMIZER_LEN = 12; my $DEF_AA_KMER_LEN = 15; my $DEF_AA_MINIMIZER_SPACES = 0; my $DEF_NT_MINIMIZER_LEN = 31; my $DEF_NT_KMER_LEN = 35; my $DEF_NT_MINIMIZER_SPACES = 7; my $DEF_THREAD_CT = 1; my $DEF_LOAD_FACTOR = 0.7; my $DEF_BLOCK_SIZE = 16384; my $DEF_SUBBLOCK_SIZE = 0; my @VALID_LIBRARY_TYPES = qw/archaea bacteria plasmid viral plant protozoa fungi human nr nt UniVec UniVec_Core/; my @VALID_SPECIAL_DB_TYPES = qw/greengenes silva rdp/; # Option/task option variables my ( $db, $threads, $minimizer_len, $kmer_len, $minimizer_spaces, $is_protein, $masking, $max_db_size, $use_ftp, $skip_maps, $load_factor, $fast_build, $block_size, $subblock_size, $minimum_bits_for_taxid, $dl_taxonomy, $dl_library, $add_to_library, $build, $standard, $clean, $special, ); # Initialization of defaults and env. var checks $db = $ENV{"KRAKEN2_DB_NAME"}; $threads = $ENV{"KRAKEN2_THREAD_CT"} || $DEF_THREAD_CT; $load_factor = $ENV{"KRAKEN2_LOAD_FACTOR"} || $DEF_LOAD_FACTOR; $kmer_len = $ENV{"KRAKEN2_KMER_LEN"}; $minimizer_len = $ENV{"KRAKEN2_MINIMIZER_LEN"}; $minimizer_spaces = $ENV{"KRAKEN2_MINIMIZER_SPACES"}; $is_protein = $ENV{"KRAKEN2_PROTEIN_DB"} || 0; $use_ftp = $ENV{"KRAKEN2_USE_FTP"} || 0; $skip_maps = $ENV{"KRAKEN2_SKIP_MAPS"} || 0; $masking = exists $ENV{"KRAKEN2_MASK_LC"} ? $ENV{"KRAKEN2_MASK_LC"} : 1; $fast_build = $ENV{"KRAKEN2_FAST_BUILD"} || 0; $block_size = $ENV{"KRAKEN2_BLOCK_SIZE"} || $DEF_BLOCK_SIZE; $subblock_size = $ENV{"KRAKEN2_SUBBLOCK_SIZE"} || $DEF_SUBBLOCK_SIZE; $minimum_bits_for_taxid = $ENV{"KRAKEN2_MIN_TAXID_BITS"} || 0; # variables corresponding to task options my @TASK_LIST = ( \$dl_taxonomy, \$dl_library, \$add_to_library, \$build, \$standard, \$clean, \$special, ); GetOptions( "help" => \&display_help, "version" => \&display_version, "db=s" => \$db, "threads=i" => \$threads, "minimizer-len=i" => \$minimizer_len, "kmer-len=i" => \$kmer_len, "minimizer-spaces=i", \$minimizer_spaces, "protein" => \$is_protein, "masking!" => \$masking, "max-db-size=i" => \$max_db_size, "use-ftp" => \$use_ftp, "skip-maps" => \$skip_maps, "load-factor=f" => \$load_factor, "fast-build" => \$fast_build, "block-size=i" => \$block_size, "subblock-size=i" => \$subblock_size, "minimum-bits-for-taxid=i" => \$minimum_bits_for_taxid, "download-taxonomy" => \$dl_taxonomy, "download-library=s" => \$dl_library, "add-to-library=s" => \$add_to_library, "build" => \$build, "standard" => \$standard, "clean" => \$clean, "special=s" => \$special, ) or usage(); if ($is_protein) { $kmer_len = $DEF_AA_KMER_LEN if ! defined $kmer_len; $minimizer_len = $DEF_AA_MINIMIZER_LEN if ! defined $minimizer_len; $minimizer_spaces = $DEF_AA_MINIMIZER_SPACES if ! defined $minimizer_spaces; } else { $kmer_len = $DEF_NT_KMER_LEN if ! defined $kmer_len; $minimizer_len = $DEF_NT_MINIMIZER_LEN if ! defined $minimizer_len; $minimizer_spaces = $DEF_NT_MINIMIZER_SPACES if ! defined $minimizer_spaces; } if ($subblock_size == 0) { $subblock_size = ceil($block_size / $threads); } if (@ARGV) { warn "Extra arguments on command line.\n"; usage(); } my $task_options = scalar grep defined $$_, @TASK_LIST; if ($task_options > 1) { warn "More than one task option selected.\n"; usage(); } if ($task_options == 0) { warn "Must select a task option.\n"; usage(); } if (! defined $db) { die "Must specify a database name\n"; } if ($threads <= 0) { die "Can't use nonpositive thread count of $threads\n"; } if ($minimizer_len > $kmer_len) { die "Minimizer length ($minimizer_len) must not be greater than k ($kmer_len)\n"; } if ($minimizer_len <= 0) { die "Can't use nonpositive minimizer length of $minimizer_len\n"; } if ($minimizer_len > 31) { die "Can't use minimizer len of $minimizer_len (must be <= 31)\n"; } if ($load_factor <= 0) { die "Can't have nonnegative load factor of $load_factor\n"; } if ($load_factor > 1) { die "Can't have load factor of $load_factor (must be no more than 1.0).\n"; } $ENV{"KRAKEN2_DB_NAME"} = $db; $ENV{"KRAKEN2_THREAD_CT"} = $threads; $ENV{"KRAKEN2_MINIMIZER_LEN"} = $minimizer_len; $ENV{"KRAKEN2_KMER_LEN"} = $kmer_len; $ENV{"KRAKEN2_MINIMIZER_SPACES"} = $minimizer_spaces; $ENV{"KRAKEN2_SEED_TEMPLATE"} = construct_seed_template(); $ENV{"KRAKEN2_PROTEIN_DB"} = $is_protein ? 1 : ""; $ENV{"KRAKEN2_MASK_LC"} = $masking ? 1 : ""; $ENV{"KRAKEN2_MAX_DB_SIZE"} = defined($max_db_size) ? $max_db_size : ""; $ENV{"KRAKEN2_USE_FTP"} = $use_ftp ? 1 : ""; $ENV{"KRAKEN2_SKIP_MAPS"} = $skip_maps ? 1 : ""; $ENV{"KRAKEN2_LOAD_FACTOR"} = $load_factor; $ENV{"KRAKEN2_FAST_BUILD"} = $fast_build ? 1 : ""; $ENV{"KRAKEN2_BLOCK_SIZE"} = $block_size; $ENV{"KRAKEN2_SUBBLOCK_SIZE"} = $subblock_size; $ENV{"KRAKEN2_MIN_TAXID_BITS"} = $minimum_bits_for_taxid; if ($dl_taxonomy) { download_taxonomy(); } elsif (defined($dl_library)) { download_library($dl_library); } elsif (defined($add_to_library)) { add_to_library($add_to_library); } elsif ($standard) { standard_installation(); } elsif ($build) { build_database(); } elsif ($clean) { clean_database(); } elsif ($special) { build_special_database($special); } else { usage(); } exit -1; # END OF MAIN CODE. sub usage { my $exit_code = @_ ? shift : 64; print STDERR < # # This file is part of the Kraken 2 taxonomic sequence classification system. # Wrapper for the dump_table program to report minimizer counts per taxa use strict; use warnings; use Fcntl; use File::Basename; use Getopt::Long; my $PROG = basename $0; my $KRAKEN2_DIR = '#####=KRAKEN2_DIR=#####'; # Test to see if the executables got moved, try to recover if we can if (! -e "$KRAKEN2_DIR/classify") { use Cwd 'abs_path'; $KRAKEN2_DIR = dirname abs_path($0); } require "$KRAKEN2_DIR/kraken2lib.pm"; $ENV{"KRAKEN2_DIR"} = $KRAKEN2_DIR; $ENV{"PATH"} = "$KRAKEN2_DIR:$ENV{PATH}"; my $DUMP_TABLE = "$KRAKEN2_DIR/dump_table"; my $db_prefix; my $names_in_output = 0; my $use_mpa_style = 0; my $report_zero_counts = 0; my $skip_counts = 0; my $threads; GetOptions( "help" => \&display_help, "version" => \&display_version, "db=s" => \$db_prefix, "skip-counts" => \$skip_counts, "use-mpa-style" => \$use_mpa_style, "report-zero-counts" => \$report_zero_counts, "threads=i" => \$threads, ); if (@ARGV) { usage(); } eval { $db_prefix = kraken2lib::find_db($db_prefix); }; if ($@) { die "$PROG: $@"; } if (! defined $threads) { $threads = $ENV{"KRAKEN2_NUM_THREADS"} || 1; } my $taxonomy = "$db_prefix/taxo.k2d"; my $kht_file = "$db_prefix/hash.k2d"; my $opt_file = "$db_prefix/opts.k2d"; for my $file ($taxonomy, $kht_file, $opt_file) { if (! -e $file) { die "$PROG: $file does not exist!\n"; } } # set flags for dump_table my @flags; push @flags, "-H", $kht_file; push @flags, "-t", $taxonomy; push @flags, "-o", $opt_file; push @flags, "-s" if $skip_counts; push @flags, "-m" if $use_mpa_style; push @flags, "-z" if $report_zero_counts; push @flags, "-p", $threads; exec $DUMP_TABLE, @flags; die "$PROG: exec error: $!\n"; sub usage { my $exit_code = @_ ? shift : 64; my $default_db = "none"; eval { $default_db = '"' . kraken2lib::find_db() . '"'; }; print STDERR < # # This file is part of the Kraken 2 taxonomic sequence classification system. # Common subroutines for other Kraken scripts use strict; use warnings; # Input: the argument for a --db option (possibly undefined) # Returns: the DB to use, taking KRAKEN2_DEFAULT_DB and KRAKEN2_DB_PATH # into account. sub find_db { my $supplied_db_prefix = shift; my $db_prefix; if (! defined $supplied_db_prefix) { if (! exists $ENV{"KRAKEN2_DEFAULT_DB"}) { die "Must specify DB with either --db or \$KRAKEN2_DEFAULT_DB\n"; } $supplied_db_prefix = $ENV{"KRAKEN2_DEFAULT_DB"}; } my @db_path = ("."); if (exists $ENV{"KRAKEN2_DB_PATH"}) { my $path_str = $ENV{"KRAKEN2_DB_PATH"}; # Allow zero-length path to be current dir $path_str =~ s/^:/.:/; $path_str =~ s/:$/:./; $path_str =~ s/::/:.:/; @db_path = split /:/, $path_str; } # Use supplied DB if abs. or rel. path is given if ($supplied_db_prefix =~ m|/|) { $db_prefix = $supplied_db_prefix; } else { # Check all dirs in KRAKEN2_DB_PATH for my $dir (@db_path) { my $checked_db = "$dir/$supplied_db_prefix"; if (-e $checked_db && -d _) { $db_prefix = $checked_db; last; } } if (! defined $db_prefix) { my $printed_path = exists $ENV{"KRAKEN2_DB_PATH"} ? qq|"$ENV{'KRAKEN2_DB_PATH'}"| : "undefined"; die "unable to find $supplied_db_prefix in \$KRAKEN2_DB_PATH ($printed_path)\n"; } } for my $file (qw/taxo.k2d hash.k2d opts.k2d/) { if (! -e "$db_prefix/$file") { die "database (\"$db_prefix\") does not contain necessary file $file\n"; } } return $db_prefix; } # Input: a FASTA sequence ID # Output: either (a) a taxonomy ID number found in the sequence ID, # (b) an NCBI accession number found in the sequence ID, or undef sub check_seqid { my $seqid = shift; my $taxid = undef; # Note all regexes here use ?: to avoid capturing the ^ or | character if ($seqid =~ /(?:^|\|)kraken:taxid\|(\d+)/) { $taxid = $1; # OK, has explicit taxid } elsif ($seqid =~ /^(\d+)$/) { $taxid = $1; # OK, has explicit taxid (w/o token) } # Accession number check elsif ($seqid =~ /(?:^|\|) # Begins seqid or immediately follows pipe ([A-Z]+ # Starts with one or more UC alphas _? # Might have an underscore next [A-Z0-9]+) # Ends with UC alphas or digits (?:\||\b|\.)/x # Followed by pipe, word boundary, or period ) { $taxid = $1; # A bit misleading - failure to pass /^\d+$/ means this is # OK, but requires accession -> taxid mapping } return $taxid; } 1; kraken2-2.1.2/scripts/lookup_accession_numbers.pl000077500000000000000000000033641404614725400222100ustar00rootroot00000000000000#!/usr/bin/env perl # Copyright 2013-2021, Derrick Wood # # This file is part of the Kraken 2 taxonomic sequence classification system. # Looks up accession numbers and reports associated taxonomy IDs # # Input is (a) 1 2-column TSV file w/ sequence IDs and accession numbers, # and (b) a list of accession2taxid files from NCBI. # Output is tab-delimited lines, with sequence IDs in first # column and taxonomy IDs in second. use strict; use warnings; use File::Basename; use Getopt::Std; my $PROG = basename $0; my $lookup_list_file = shift @ARGV; my %target_lists; open FILE, "<", $lookup_list_file or die "$PROG: can't open $lookup_list_file: $!\n"; while () { chomp; my ($seqid, $accnum) = split /\t/; $target_lists{$accnum} ||= []; push @{ $target_lists{$accnum} }, $seqid; } close FILE; my $initial_target_count = scalar keys %target_lists; my @accession_map_files = @ARGV; for my $file (@accession_map_files) { open FILE, "<", $file or die "$PROG: can't open $file: $!\n"; scalar(); # discard header line while () { chomp; my ($accession, $with_version, $taxid, $gi) = split /\t/; if ($target_lists{$accession}) { my @list = @{ $target_lists{$accession} }; delete $target_lists{$accession}; for my $seqid (@list) { print "$seqid\t$taxid\n"; } last if ! %target_lists; } } close FILE; last if ! %target_lists; } if (%target_lists) { warn "$PROG: @{[scalar keys %target_lists]}/$initial_target_count accession numbers remain unmapped, see unmapped.txt in DB directory\n"; open LIST, ">", "unmapped.txt" or die "$PROG: can't write unmapped.txt: $!\n"; for my $target (keys %target_lists) { print LIST "$target\n"; } close LIST; } kraken2-2.1.2/scripts/make_seqid2taxid_map.pl000077500000000000000000000074531404614725400211730ustar00rootroot00000000000000#!/usr/bin/env perl # Copyright 2013-2021, Derrick Wood # # This file is part of the Kraken 2 taxonomic sequence classification system. # Reads multi-FASTA input and examines each sequence header. In quiet mode # (-q, used as an initial check on file validity), headers are OK if a # taxonomy ID is found (as either the entire sequence ID or as part of a # "kraken:taxid" token), or if something looking like a GI or accession # number is found. In normal mode, the taxonomy ID will be looked up (if # not explicitly specified in the sequence ID) and reported if it can be # found. Output is tab-delimited lines, with sequence IDs in first # column and taxonomy IDs in second. # Sequence IDs with a kraken:taxid token will use that to assign taxonomy # ID, e.g.: # >gi|32499|ref|NC_021949.2|kraken:taxid|562| # # Sequence IDs that are completely numeric are assumed to be the taxonomy # ID for that sequence. # # Otherwise, an accession number is searched for; if not found, a GI # number is searched for. Failure to find any of the above is a fatal error. # Without -q, a comma-separated file list specified by -A (for both accession # numbers and GI numbers) is examined; failure to find a # taxonomy ID that maps to a provided accession/GI number is non-fatal and # will emit a warning. # # With -q, does not print any output, and will die w/ nonzero exit instead # of warning when unable to find a taxid, accession #, or GI #. use strict; use warnings; use File::Basename; use Getopt::Std; my $PROG = basename $0; getopts('qA:L:', \my %opts); if ($opts{'q'} && (defined $opts{"A"} || defined $opts{"L"})) { die "$PROG: -q doesn't allow for -A/-L\n"; } my %target_lists; while (<>) { next unless /^>(\S+)/; my $seq_id = $1; my $output; # Note all regexes here use ?: to avoid capturing the ^ or | character if ($seq_id =~ /(?:^|\|)kraken:taxid\|(\d+)/) { $output = "$seq_id\t$1\n"; } elsif ($seq_id =~ /^\d+$/) { $output = "$seq_id\t$seq_id\n"; } # Accession regex is first, only puts number (not version) into $1 elsif ($seq_id =~ /(?:^|\|)([A-Z]+_?[A-Z0-9]+)(?:\||\b|\.)/ || $seq_id =~ /(?:^|\|)gi\|(\d+)/) { if (! $opts{"q"}) { $target_lists{$1} ||= []; push @{ $target_lists{$1} }, $seq_id; } } else { die "$PROG: unable to determine taxonomy ID for sequence $seq_id\n"; } if (defined $output && ! $opts{"q"}) { print $output; } } if ($opts{"q"}) { if (keys %target_lists) { print "$PROG: requires external map\n"; } exit 0; } exit 0 if ! keys %target_lists; if (! (defined $opts{"A"} || defined $opts{"L"})) { die "$PROG: found sequence ID without explicit taxonomy ID, but no map used\n"; } # Remove targets where we've already handled the mapping if (defined $opts{"L"}) { my $library_map_file = $opts{"L"}; open FILE, "<", $library_map_file or die "$PROG: can't open $library_map_file: $!\n"; while () { chomp; my ($seqid, $taxid) = split /\t/; if (exists $target_lists{$seqid}) { print "$seqid\t$taxid\n"; delete $target_lists{$seqid}; } } close FILE; } exit 0 if ! keys %target_lists; my @accession_map_files = split /,/, $opts{"A"}; for my $file (@accession_map_files) { open FILE, "<", $file or die "$PROG: can't open $file: $!\n"; scalar(); # discard header line while () { chomp; my ($accession, $with_version, $taxid, $gi) = split /\t/; if ($target_lists{$accession}) { my @list = @{ $target_lists{$accession} }; delete $target_lists{$accession}; for my $seqid (@list) { print "$seqid\t$taxid\n"; } } if ($gi ne "na" && $target_lists{$gi}) { my @list = @{ $target_lists{$gi} }; delete $target_lists{$gi}; for my $seqid (@list) { print "$seqid\t$taxid\n"; } } } close FILE; } kraken2-2.1.2/scripts/mask_low_complexity.sh000077500000000000000000000022751404614725400212050ustar00rootroot00000000000000#!/bin/bash # Copyright 2013-2021, Derrick Wood # # This file is part of the Kraken 2 taxonomic sequence classification system. # Masks low complexity sequences in the database using the dustmasker (nucl.) # or segmasker (prot.) programs from NCBI. set -u # Protect against uninitialized vars. set -e # Stop on error set -o pipefail # Stop on failures in non-final pipeline commands target="$1" MASKER="dustmasker" if [ -n "$KRAKEN2_PROTEIN_DB" ]; then MASKER="segmasker" fi if ! which $MASKER > /dev/null; then echo "Unable to find $MASKER in path, can't mask low-complexity sequences" exit 1 fi if [ -d $target ]; then for file in $(find $target '(' -name '*.fna' -o -name '*.faa' ')'); do if [ ! -e "$file.masked" ]; then $MASKER -in $file -outfmt fasta | sed -e '/^>/!s/[a-z]/x/g' > "$file.tmp" mv "$file.tmp" $file touch "$file.masked" fi done elif [ -f $target ]; then if [ ! -e "$target.masked" ]; then $MASKER -in $target -outfmt fasta | sed -e '/^>/!s/[a-z]/x/g' > "$target.tmp" mv "$target.tmp" $target touch "$target.masked" fi else echo "Target $target must be directory or regular file, aborting masking" exit 1 fi kraken2-2.1.2/scripts/rsync_from_ncbi.pl000077500000000000000000000143731404614725400202730ustar00rootroot00000000000000#!/usr/bin/env perl # Copyright 2013-2021, Derrick Wood # # This file is part of the Kraken 2 taxonomic sequence classification system. # Reads an assembly_summary.txt file, which indicates taxids and FTP paths for # genome/protein data. Performs the download of the complete genomes from # that file, decompresses, and explicitly assigns taxonomy as needed. use strict; use warnings; use File::Basename; use Getopt::Std; use Net::FTP; use List::Util qw/max/; my $PROG = basename $0; my $SERVER = "ftp.ncbi.nlm.nih.gov"; my $SERVER_PATH = "/genomes"; my $FTP_USER = "anonymous"; my $FTP_PASS = "kraken2download"; my $qm_server = quotemeta $SERVER; my $qm_server_path = quotemeta $SERVER_PATH; my $is_protein = $ENV{"KRAKEN2_PROTEIN_DB"}; my $use_ftp = $ENV{"KRAKEN2_USE_FTP"}; my $suffix = $is_protein ? "_protein.faa.gz" : "_genomic.fna.gz"; # Manifest hash maps filenames (keys) to taxids (values) my %manifest; while (<>) { next if /^#/; chomp; my @fields = split /\t/; my ($taxid, $asm_level, $ftp_path) = @fields[5, 11, 19]; # Possible TODO - make the list here configurable by user-supplied flags next unless grep {$asm_level eq $_} ("Complete Genome", "Chromosome"); next if $ftp_path eq "na"; # Skip if no provided path my $full_path = $ftp_path . "/" . basename($ftp_path) . $suffix; # strip off server/leading dir name to allow --files-from= to work w/ rsync # also allows filenames to just start with "all/", which is nice if (! ($full_path =~ s#^ftp://${qm_server}${qm_server_path}/##)) { die "$PROG: unexpected FTP path (new server?) for $ftp_path\n"; } $manifest{$full_path} = $taxid; } open MANIFEST, ">", "manifest.txt" or die "$PROG: can't write manifest: $!\n"; print MANIFEST "$_\n" for keys %manifest; close MANIFEST; if ($is_protein && ! $use_ftp) { print STDERR "Step 0/2: performing rsync dry run (only protein d/l requires this)...\n"; # Protein files aren't always present, so we have to do this two-rsync run hack # First, do a dry run to find non-existent files, then delete them from the # manifest; after this, execution can proceed as usual. system("rsync --dry-run --no-motd --files-from=manifest.txt rsync://${SERVER}${SERVER_PATH} . 2> rsync.err"); open ERR_FILE, "<", "rsync.err" or die "$PROG: can't read rsync.err file: $!\n"; while () { chomp; # I really doubt this will work across every version of rsync. :( if (/failed: No such file or directory/ && /^rsync: link_stat "\/([^"]+)"/) { delete $manifest{$1}; } } close ERR_FILE; print STDERR "Rsync dry run complete, removing any non-existent files from manifest.\n"; # Rewrite manifest open MANIFEST, ">", "manifest.txt" or die "$PROG: can't write manifest: $!\n"; print MANIFEST "$_\n" for keys %manifest; close MANIFEST; } sub ftp_connection { my $ftp = Net::FTP->new($SERVER, Passive => 1) or die "$PROG: FTP connection error: $@\n"; $ftp->login($FTP_USER, $FTP_PASS) or die "$PROG: FTP login error: " . $ftp->message() . "\n"; $ftp->binary() or die "$PROG: FTP binary mode error: " . $ftp->message() . "\n"; $ftp->cwd($SERVER_PATH) or die "$PROG: FTP CD error: " . $ftp->message() . "\n"; return $ftp; } if ($use_ftp) { print STDERR "Step 1/2: Performing ftp file transfer of requested files\n"; open MANIFEST, "<", "manifest.txt" or die "$PROG: can't open manifest: $!\n"; mkdir "all" or die "$PROG: can't create 'all' directory: $!\n"; chdir "all" or die "$PROG: can't chdir into 'all' directory: $!\n"; while () { chomp; my $ftp = ftp_connection(); my $try = 0; my $ntries = 5; my $sleepsecs = 3; while($try < $ntries) { $try++; last if $ftp->get($_); warn "$PROG: unable to download $_ on try $try of $ntries: ".$ftp->message()."\n"; last if $try == $ntries; sleep $sleepsecs; $sleepsecs *= 3; } die "$PROG: unable to download ftp://${SERVER}${SERVER_PATH}/$_\n" if $try == $ntries; $ftp->quit; } close MANIFEST; chdir ".." or die "$PROG: can't return to correct directory: $!\n"; } else { print STDERR "Step 1/2: Performing rsync file transfer of requested files\n"; system("rsync --no-motd --files-from=manifest.txt rsync://${SERVER}${SERVER_PATH}/ .") == 0 or die "$PROG: rsync error, exiting: $?\n"; print STDERR "Rsync file transfer complete.\n"; } print STDERR "Step 2/2: Assigning taxonomic IDs to sequences\n"; my $output_file = $is_protein ? "library.faa" : "library.fna"; open OUT, ">", $output_file or die "$PROG: can't write $output_file: $!\n"; my $projects_added = 0; my $sequences_added = 0; my $ch_added = 0; my $ch = $is_protein ? "aa" : "bp"; my $max_out_chars = 0; for my $in_filename (keys %manifest) { my $taxid = $manifest{$in_filename}; if ($use_ftp) { # FTP downloading doesn't create full path locally $in_filename = "all/" . basename($in_filename); } open IN, "gunzip -c $in_filename |" or die "$PROG: can't read $in_filename: $!\n"; while () { if (/^>/) { s/^>/>kraken:taxid|$taxid|/; $sequences_added++; } else { $ch_added += length($_) - 1; } print OUT; } close IN; unlink $in_filename; $projects_added++; my $out_line = progress_line($projects_added, scalar keys %manifest, $sequences_added, $ch_added) . "..."; $max_out_chars = max(length($out_line), $max_out_chars); my $space_line = " " x $max_out_chars; print STDERR "\r$space_line\r$out_line" if -t STDERR; } close OUT; print STDERR " done.\n" if -t STDERR; print STDERR "All files processed, cleaning up extra sequence files..."; system("rm -rf all/") == 0 or die "$PROG: can't clean up all/ directory: $?\n"; print STDERR " done, library complete.\n"; sub progress_line { my ($projs, $total_projs, $seqs, $chs) = @_; my $line = "Processed "; $line .= ($projs == $total_projs) ? "$projs" : "$projs/$total_projs"; $line .= " project" . ($total_projs > 1 ? 's' : '') . " "; $line .= "($seqs sequence" . ($seqs > 1 ? 's' : '') . ", "; my $prefix; my @prefixes = qw/k M G T P E/; while (@prefixes && $chs >= 1000) { $prefix = shift @prefixes; $chs /= 1000; } if (defined $prefix) { $line .= sprintf '%.2f %s%s)', $chs, $prefix, $ch; } else { $line .= "$chs $ch)"; } return $line; } kraken2-2.1.2/scripts/scan_fasta_file.pl000077500000000000000000000031371404614725400202140ustar00rootroot00000000000000#!/usr/bin/env perl # Copyright 2013-2021, Derrick Wood # # This file is part of the Kraken 2 taxonomic sequence classification system. # Reads multi-FASTA input and examines each sequence header. Headers are # OK if a taxonomy ID is found (as either the entire sequence ID or as part # of a # "kraken:taxid" token), or if something looking like an accession # number is found. Not "OK" headers will are fatal errors unless "--lenient" # is used. # # Each sequence header results in a line with three tab-separated values; # the first indicating whether third column is the taxonomy ID ("TAXID") or # an accession number ("ACCNUM") for the sequence ID listed in the second # column. use strict; use warnings; use File::Basename; use Getopt::Long; require "$ENV{KRAKEN2_DIR}/kraken2lib.pm"; my $PROG = basename $0; my $lenient = 0; GetOptions("lenient" => \$lenient) or die "Usage: $PROG [--lenient] \n"; while (<>) { next unless /^>/; # while (/.../g) needed because non-redundant DBs sometimes have multiple # sequence IDs in the header; extra sequence IDs are prefixed by # '\x01' characters (if downloaded in FASTA format from NCBI FTP directly). while (/(?:^>|\x01)(\S+)/g) { my $seqid = $1; my $taxid = kraken2lib::check_seqid($seqid); if (! defined $taxid) { next if $lenient; die "$PROG: unable to determine taxonomy ID for sequence $seqid\n"; } # "$taxid" may actually be an accession number if ($taxid =~ /^\d+$/) { print "TAXID\t$seqid\t$taxid\n"; } else { print "ACCNUM\t$seqid\t$taxid\n"; } } } kraken2-2.1.2/scripts/standard_installation.sh000077500000000000000000000030741404614725400214730ustar00rootroot00000000000000#!/bin/bash # Copyright 2013-2021, Derrick Wood # # This file is part of the Kraken 2 taxonomic sequence classification system. # Build the standard Kraken database # Designed to be called by kraken_build set -u # Protect against uninitialized vars. set -e # Stop on error set -o pipefail # Stop on failures in non-final pipeline commands protein_flag="" if [ -n "$KRAKEN2_PROTEIN_DB" ]; then protein_flag="--protein" fi masking_flag="" if [ -z "$KRAKEN2_MASK_LC" ]; then masking_flag="--no-mask" fi ftp_flag="" if [ -n "$KRAKEN2_USE_FTP" ]; then ftp_flag="--use-ftp" fi kraken2-build --db $KRAKEN2_DB_NAME --download-taxonomy $masking_flag $protein_flag $ftp_flag kraken2-build --db $KRAKEN2_DB_NAME --download-library archaea $masking_flag $protein_flag $ftp_flag kraken2-build --db $KRAKEN2_DB_NAME --download-library bacteria $masking_flag $protein_flag $ftp_flag kraken2-build --db $KRAKEN2_DB_NAME --download-library viral $masking_flag $protein_flag $ftp_flag kraken2-build --db $KRAKEN2_DB_NAME --download-library plasmid $masking_flag $protein_flag $ftp_flag kraken2-build --db $KRAKEN2_DB_NAME --download-library human --no-mask $protein_flag $ftp_flag if [ -z "$KRAKEN2_PROTEIN_DB" ]; then kraken2-build --db $KRAKEN2_DB_NAME --download-library UniVec_Core $masking_flag $ftp_flag fi kraken2-build --db $KRAKEN2_DB_NAME --build --threads $KRAKEN2_THREAD_CT \ --minimizer-len $KRAKEN2_MINIMIZER_LEN \ --kmer-len $KRAKEN2_KMER_LEN \ --minimizer-spaces $KRAKEN2_MINIMIZER_SPACES \ $protein_flag kraken2-2.1.2/src/000077500000000000000000000000001404614725400136475ustar00rootroot00000000000000kraken2-2.1.2/src/CMakeLists.txt000066400000000000000000000016351404614725400164140ustar00rootroot00000000000000add_executable(build_db build_db.cc mmap_file.cc compact_hash.cc taxonomy.cc seqreader.cc mmscanner.cc omp_hack.cc utilities.cc) add_executable(classify classify.cc reports.cc mmap_file.cc compact_hash.cc taxonomy.cc seqreader.cc mmscanner.cc omp_hack.cc aa_translate.cc utilities.cc hyperloglogplus.cc) add_executable(estimate_capacity estimate_capacity.cc seqreader.cc mmscanner.cc omp_hack.cc utilities.cc) add_executable(dump_table dump_table.cc mmap_file.cc compact_hash.cc omp_hack.cc taxonomy.cc reports.cc hyperloglogplus.cc) add_executable(lookup_accession_numbers lookup_accession_numbers.cc mmap_file.cc omp_hack.cc utilities.cc) kraken2-2.1.2/src/Makefile000066400000000000000000000036431404614725400153150ustar00rootroot00000000000000CXX = g++ CXXFLAGS = -fopenmp -Wall -std=c++11 -O3 CXXFLAGS += -DLINEAR_PROBING .PHONY: all clean install PROGS = estimate_capacity build_db classify dump_table lookup_accession_numbers all: $(PROGS) install: $(PROGS) cp $(PROGS) "$(KRAKEN2_DIR)/" clean: rm -f *.o $(PROGS) taxonomy.o: taxonomy.cc taxonomy.h mmap_file.h hyperloglogplus.o: hyperloglogplus.cc hyperloglogplus.h mmap_file.o: mmap_file.cc mmap_file.h compact_hash.o: compact_hash.cc compact_hash.h kv_store.h mmap_file.h kraken2_data.h mmscanner.o: mmscanner.cc mmscanner.h seqreader.o: seqreader.cc seqreader.h omp_hack.o: omp_hack.cc omp_hack.h reports.o: reports.cc reports.h kraken2_data.h aa_translate.o: aa_translate.cc aa_translate.h utilities.o: utilities.cc utilities.h classify.o: classify.cc kraken2_data.h kv_store.h taxonomy.h seqreader.h mmscanner.h compact_hash.h aa_translate.h reports.h utilities.h readcounts.h dump_table.o: dump_table.cc compact_hash.h taxonomy.h mmscanner.h kraken2_data.h reports.h estimate_capacity.o: estimate_capacity.cc kv_store.h mmscanner.h seqreader.h utilities.h build_db.o: build_db.cc taxonomy.h mmscanner.h seqreader.h compact_hash.h kv_store.h kraken2_data.h utilities.h lookup_accession_numbers.o: lookup_accession_numbers.cc mmap_file.h utilities.h build_db: build_db.o mmap_file.o compact_hash.o taxonomy.o seqreader.o mmscanner.o omp_hack.o utilities.o $(CXX) $(CXXFLAGS) -o $@ $^ classify: classify.o reports.o hyperloglogplus.o mmap_file.o compact_hash.o taxonomy.o seqreader.o mmscanner.o omp_hack.o aa_translate.o utilities.o $(CXX) $(CXXFLAGS) -o $@ $^ estimate_capacity: estimate_capacity.o seqreader.o mmscanner.o omp_hack.o utilities.o $(CXX) $(CXXFLAGS) -o $@ $^ dump_table: dump_table.o mmap_file.o compact_hash.o omp_hack.o taxonomy.o reports.o hyperloglogplus.o $(CXX) $(CXXFLAGS) -o $@ $^ lookup_accession_numbers: lookup_accession_numbers.o mmap_file.o omp_hack.o utilities.o $(CXX) $(CXXFLAGS) -o $@ $^ kraken2-2.1.2/src/aa_translate.cc000066400000000000000000000075201404614725400166200ustar00rootroot00000000000000/* * Copyright 2013-2021, Derrick Wood * * This file is part of the Kraken 2 taxonomic sequence classification system. */ #include "aa_translate.h" using std::string; using std::vector; namespace kraken2 { /* This *was* the map, which was ridiculously slow. Note that ordering is AGCT, not ACGT. static map old_translation_map = { { "AAA", 'K' }, { "AAG", 'K' }, { "AAC", 'N' }, { "AAT", 'N' }, { "AGA", 'R' }, { "AGG", 'R' }, { "AGC", 'S' }, { "AGT", 'S' }, { "ACA", 'T' }, { "ACG", 'T' }, { "ACC", 'T' }, { "ACT", 'T' }, { "ATA", 'I' }, { "ATG", 'M' }, { "ATC", 'I' }, { "ATT", 'I' }, { "GAA", 'E' }, { "GAG", 'E' }, { "GAC", 'D' }, { "GAT", 'D' }, { "GGA", 'G' }, { "GGG", 'G' }, { "GGC", 'G' }, { "GGT", 'G' }, { "GCA", 'A' }, { "GCG", 'A' }, { "GCC", 'A' }, { "GCT", 'A' }, { "GTA", 'V' }, { "GTG", 'V' }, { "GTC", 'V' }, { "GTT", 'V' }, { "CAA", 'Q' }, { "CAG", 'Q' }, { "CAC", 'H' }, { "CAT", 'H' }, { "CGA", 'R' }, { "CGG", 'R' }, { "CGC", 'R' }, { "CGT", 'R' }, { "CCA", 'P' }, { "CCG", 'P' }, { "CCC", 'P' }, { "CCT", 'P' }, { "CTA", 'L' }, { "CTG", 'L' }, { "CTC", 'L' }, { "CTT", 'L' }, { "TAA", '*' }, { "TAG", '*' }, { "TAC", 'Y' }, { "TAT", 'Y' }, { "TGA", '*' }, { "TGG", 'W' }, { "TGC", 'C' }, { "TGT", 'C' }, { "TCA", 'S' }, { "TCG", 'S' }, { "TCC", 'S' }, { "TCT", 'S' }, { "TTA", 'L' }, { "TTG", 'L' }, { "TTC", 'F' }, { "TTT", 'F' } }; */ static char translation_map[] = "KKNNRRSSTTTTIMIIEEDDGGGGAAAAVVVVQQHHRRRRPPPPLLLL**YY*WCCSSSSLLFF"; static uint8_t fwd_lookup_table[UINT8_MAX + 1] = {0}; static uint8_t rev_lookup_table[UINT8_MAX + 1] = {0}; void TranslateToAllFrames(string &dna_seq, vector &aa_seqs) { auto max_size = (dna_seq.size() / 3) + 1; for (auto i = 0; i < 6; i++) aa_seqs[i].assign(max_size, ' '); if (dna_seq.size() < 3) return; if (fwd_lookup_table[0] == 0) { for (size_t i = 0; i <= UINT8_MAX; i++) fwd_lookup_table[i] = rev_lookup_table[i] = UINT8_MAX; // Map is based on AGCT coding, not ACGT fwd_lookup_table[(int) 'A'] = fwd_lookup_table[(int) 'a'] = 0x00; fwd_lookup_table[(int) 'G'] = fwd_lookup_table[(int) 'g'] = 0x01; fwd_lookup_table[(int) 'C'] = fwd_lookup_table[(int) 'c'] = 0x02; fwd_lookup_table[(int) 'T'] = fwd_lookup_table[(int) 't'] = 0x03; rev_lookup_table[(int) 'A'] = rev_lookup_table[(int) 'a'] = 0x30; rev_lookup_table[(int) 'G'] = rev_lookup_table[(int) 'g'] = 0x20; rev_lookup_table[(int) 'C'] = rev_lookup_table[(int) 'c'] = 0x10; rev_lookup_table[(int) 'T'] = rev_lookup_table[(int) 't'] = 0x00; } uint8_t fwd_codon = 0, rev_codon = 0; int ambig_nt_countdown = 0; // if positive, bases to go until N leaves codon size_t frame_len[6] = {0}; for (auto i = 0u; i < dna_seq.size(); i++) { auto frame = i % 3; fwd_codon <<= 2; fwd_codon &= 0x3f; rev_codon >>= 2; if (ambig_nt_countdown) ambig_nt_countdown--; auto fwd_lookup_code = fwd_lookup_table[(int) dna_seq[i]]; auto rev_lookup_code = rev_lookup_table[(int) dna_seq[i]]; if (fwd_lookup_code == UINT8_MAX) ambig_nt_countdown = 3; else { fwd_codon |= fwd_lookup_code; rev_codon |= rev_lookup_code; } if (i >= 2) { // we've got a full codon char ch; // Translate and append to forward frame ch = ambig_nt_countdown == 0 ? translation_map[fwd_codon] : 'X'; aa_seqs[frame][frame_len[frame]++] = ch; // Translate and prepend to reverse frame ch = ambig_nt_countdown == 0 ? translation_map[rev_codon] : 'X'; aa_seqs[frame + 3][max_size - 1 - frame_len[frame + 3]++] = ch; } } for (auto i = 0; i < 3; i++) if (aa_seqs[i].size() != frame_len[i]) aa_seqs[i].resize(frame_len[i]); for (auto i = 3; i < 6; i++) if (aa_seqs[i].size() != frame_len[i]) aa_seqs[i].erase(0, aa_seqs[i].size() - frame_len[i]); } } kraken2-2.1.2/src/aa_translate.h000066400000000000000000000005451404614725400164620ustar00rootroot00000000000000/* * Copyright 2013-2021, Derrick Wood * * This file is part of the Kraken 2 taxonomic sequence classification system. */ #ifndef KRAKEN2_AA_TRANSLATE_H_ #define KRAKEN2_AA_TRANSLATE_H_ #include "kraken2_headers.h" namespace kraken2 { void TranslateToAllFrames(std::string &dna_seq, std::vector &aa_seqs); } #endif kraken2-2.1.2/src/build_db.cc000066400000000000000000000506541404614725400157340ustar00rootroot00000000000000/* * Copyright 2013-2021, Derrick Wood * * This file is part of the Kraken 2 taxonomic sequence classification system. */ #include "kraken2_headers.h" #include "taxonomy.h" #include "mmscanner.h" #include "seqreader.h" #include "compact_hash.h" #include "kv_store.h" #include "kraken2_data.h" #include "utilities.h" using std::string; using std::map; using std::cout; using std::queue; using std::cerr; using std::endl; using std::vector; using std::istringstream; using std::ifstream; using std::ofstream; using namespace kraken2; // These will likely be overridden by wrapper script, // just keeping sane defaults in case they aren't #define DEFAULT_BLOCK_SIZE (10 * 1024 * 1024) // 10 MB #define DEFAULT_SUBBLOCK_SIZE (1024) struct Options { string ID_to_taxon_map_filename; string ncbi_taxonomy_directory; string hashtable_filename; string options_filename; string taxonomy_filename; size_t block_size; size_t subblock_size; size_t requested_bits_for_taxid; int num_threads; bool input_is_protein; ssize_t k, l; size_t capacity; size_t maximum_capacity; uint64_t spaced_seed_mask; uint64_t toggle_mask; uint64_t min_clear_hash_value; bool deterministic_build; }; void ParseCommandLine(int argc, char **argv, Options &opts); void usage(int exit_code = EX_USAGE); vector ExtractNCBISequenceIDs(const string &header); void ProcessSequenceFast(const string &seq, taxid_t taxid, CompactHashTable &hash, const Taxonomy &tax, MinimizerScanner &scanner, uint64_t min_clear_hash_value); void ProcessSequence(const Options &opts, const string &seq, taxid_t taxid, CompactHashTable &hash, const Taxonomy &tax); void ProcessSequencesFast(const Options &opts, const map &ID_to_taxon_map, CompactHashTable &kraken_index, const Taxonomy &taxonomy); void ProcessSequences(const Options &opts, const map &ID_to_taxon_map, CompactHashTable &kraken_index, const Taxonomy &taxonomy); void SetMinimizerLCA(CompactHashTable &hash, uint64_t minimizer, taxid_t taxid, const Taxonomy &tax); void ReadIDToTaxonMap(map &id_map, string &filename); void GenerateTaxonomy(Options &opts, map &id_map); struct TaxonSeqPair { taxid_t taxon; string seq; }; int main(int argc, char **argv) { Options opts; opts.spaced_seed_mask = DEFAULT_SPACED_SEED_MASK; opts.toggle_mask = DEFAULT_TOGGLE_MASK; opts.input_is_protein = false; opts.num_threads = 1; opts.block_size = DEFAULT_BLOCK_SIZE; opts.subblock_size = DEFAULT_SUBBLOCK_SIZE; opts.requested_bits_for_taxid = 0; opts.min_clear_hash_value = 0; opts.maximum_capacity = 0; opts.deterministic_build = true; ParseCommandLine(argc, argv, opts); omp_set_num_threads( opts.num_threads ); map ID_to_taxon_map; ReadIDToTaxonMap(ID_to_taxon_map, opts.ID_to_taxon_map_filename); GenerateTaxonomy(opts, ID_to_taxon_map); std::cerr << "Taxonomy parsed and converted." << std::endl; Taxonomy taxonomy(opts.taxonomy_filename.c_str()); taxonomy.GenerateExternalToInternalIDMap(); size_t bits_needed_for_value = 1; while ((1 << bits_needed_for_value) < (ssize_t) taxonomy.node_count()) bits_needed_for_value++; if (opts.requested_bits_for_taxid > 0 && bits_needed_for_value > opts.requested_bits_for_taxid) errx(EX_DATAERR, "more bits required for storing taxid"); size_t bits_for_taxid = bits_needed_for_value; if (bits_for_taxid < opts.requested_bits_for_taxid) bits_for_taxid = opts.requested_bits_for_taxid; auto actual_capacity = opts.capacity; if (opts.maximum_capacity) { double frac = opts.maximum_capacity * 1.0 / opts.capacity; if (frac > 1) errx(EX_DATAERR, "maximum capacity larger than requested capacity"); opts.min_clear_hash_value = (uint64_t) ((1 - frac) * UINT64_MAX); actual_capacity = opts.maximum_capacity; } CompactHashTable kraken_index(actual_capacity, 32 - bits_for_taxid, bits_for_taxid); std::cerr << "CHT created with " << bits_for_taxid << " bits reserved for taxid." << std::endl; if (opts.deterministic_build) ProcessSequences(opts, ID_to_taxon_map, kraken_index, taxonomy); else ProcessSequencesFast(opts, ID_to_taxon_map, kraken_index, taxonomy); std::cerr << "Writing data to disk... " << std::flush; kraken_index.WriteTable(opts.hashtable_filename.c_str()); IndexOptions index_opts; index_opts.k = opts.k; index_opts.l = opts.l; index_opts.spaced_seed_mask = opts.spaced_seed_mask; index_opts.toggle_mask = opts.toggle_mask; index_opts.dna_db = ! opts.input_is_protein; index_opts.minimum_acceptable_hash_value = opts.min_clear_hash_value; index_opts.revcom_version = CURRENT_REVCOM_VERSION; index_opts.db_version = 0; index_opts.db_type = 0; ofstream opts_fs(opts.options_filename); opts_fs.write((char *) &index_opts, sizeof(index_opts)); if (! opts_fs.good()) errx(EX_OSERR, "Unable to write options file %s", opts.options_filename.c_str()); opts_fs.close(); std::cerr << " complete." << std::endl; return 0; } // A quick but nondeterministic build void ProcessSequencesFast(const Options &opts, const map &ID_to_taxon_map, CompactHashTable &kraken_index, const Taxonomy &taxonomy) { size_t processed_seq_ct = 0; size_t processed_ch_ct = 0; #pragma omp parallel { Sequence sequence; MinimizerScanner scanner(opts.k, opts.l, opts.spaced_seed_mask, ! opts.input_is_protein, opts.toggle_mask); BatchSequenceReader reader; while (true) { // Declaration of "ok" and break need to be done outside of critical // section to conform with OpenMP spec. bool ok; #pragma omp critical(reader) ok = reader.LoadBlock(std::cin, opts.block_size); if (! ok) break; while (reader.NextSequence(sequence)) { auto all_sequence_ids = ExtractNCBISequenceIDs(sequence.header); taxid_t taxid = 0; for (auto &seqid : all_sequence_ids) { if (ID_to_taxon_map.count(seqid) == 0) continue; auto ext_taxid = ID_to_taxon_map.at(seqid); taxid = taxonomy.LowestCommonAncestor(taxid, taxonomy.GetInternalID(ext_taxid)); } if (taxid) { // Add terminator for protein sequences if not already there if (opts.input_is_protein && sequence.seq.back() != '*') sequence.seq.push_back('*'); ProcessSequenceFast(sequence.seq, taxid, kraken_index, taxonomy, scanner, opts.min_clear_hash_value); #pragma omp atomic processed_seq_ct++; #pragma omp atomic processed_ch_ct += sequence.seq.size(); } } if (isatty(fileno(stderr))) { #pragma omp critical(status_update) std::cerr << "\rProcessed " << processed_seq_ct << " sequences (" << processed_ch_ct << " " << (opts.input_is_protein ? "aa" : "bp") << ")..."; } } } if (isatty(fileno(stderr))) std::cerr << "\r"; std::cerr << "Completed processing of " << processed_seq_ct << " sequences, " << processed_ch_ct << " " << (opts.input_is_protein ? "aa" : "bp") << std::endl; } // Slightly slower but deterministic when multithreaded void ProcessSequences(const Options &opts, const map &ID_to_taxon_map, CompactHashTable &kraken_index, const Taxonomy &taxonomy) { size_t processed_seq_ct = 0; size_t processed_ch_ct = 0; Sequence sequence; BatchSequenceReader reader; while (reader.LoadBlock(std::cin, DEFAULT_BLOCK_SIZE)) { while (reader.NextSequence(sequence)) { auto all_sequence_ids = ExtractNCBISequenceIDs(sequence.header); taxid_t taxid = 0; for (auto &seqid : all_sequence_ids) { if (ID_to_taxon_map.count(seqid) == 0) continue; auto ext_taxid = ID_to_taxon_map.at(seqid); taxid = taxonomy.LowestCommonAncestor(taxid, taxonomy.GetInternalID(ext_taxid)); } if (taxid) { // Add terminator for protein sequences if not already there if (opts.input_is_protein && sequence.seq.back() != '*') sequence.seq.push_back('*'); ProcessSequence(opts, sequence.seq, taxid, kraken_index, taxonomy); processed_seq_ct++; processed_ch_ct += sequence.seq.size(); } } if (isatty(fileno(stderr))) { std::cerr << "\rProcessed " << processed_seq_ct << " sequences (" << processed_ch_ct << " " << (opts.input_is_protein ? "aa" : "bp") << ")..."; } } if (isatty(fileno(stderr))) std::cerr << "\r"; std::cerr << "Completed processing of " << processed_seq_ct << " sequences, " << processed_ch_ct << " " << (opts.input_is_protein ? "aa" : "bp") << std::endl; } // This function exists to deal with NCBI's use of \x01 characters to denote // the start of a new FASTA header in the same line (for non-redundant DBs). // We return all sequence IDs in a header line, not just the first. vector ExtractNCBISequenceIDs(const string &header) { vector list; string current_str; bool in_id = true; // start loop at first char after '>' for (size_t i = 1; i < header.size(); ++i) { if (header[i] == 0x01) { // 0x01 starts new ID at next char if (! current_str.empty()) list.push_back(current_str); current_str.clear(); in_id = true; } else if (in_id && isspace(header[i])) { // spaces end ID if (! current_str.empty()) list.push_back(current_str); current_str.clear(); in_id = false; } else if (in_id) { // Build ID string char by char current_str.push_back(header[i]); } } if (! current_str.empty()) list.push_back(current_str); return list; } void SetMinimizerLCA(CompactHashTable &hash, uint64_t minimizer, taxid_t taxid, const Taxonomy &tax) { hvalue_t old_value = 0; hvalue_t new_value = taxid; while (! hash.CompareAndSet(minimizer, new_value, &old_value)) new_value = tax.LowestCommonAncestor(old_value, taxid); } void ProcessSequenceFast(const string &seq, taxid_t taxid, CompactHashTable &hash, const Taxonomy &tax, MinimizerScanner &scanner, uint64_t min_clear_hash_value) { scanner.LoadSequence(seq); uint64_t *minimizer_ptr; while ((minimizer_ptr = scanner.NextMinimizer())) { if (scanner.is_ambiguous()) continue; if (min_clear_hash_value && MurmurHash3(*minimizer_ptr) < min_clear_hash_value) continue; hvalue_t existing_taxid = 0; hvalue_t new_taxid = taxid; while (! hash.CompareAndSet(*minimizer_ptr, new_taxid, &existing_taxid)) { new_taxid = tax.LowestCommonAncestor(new_taxid, existing_taxid); } } } void ProcessSequence(const Options &opts, const string &seq, taxid_t taxid, CompactHashTable &hash, const Taxonomy &tax) { const int set_ct = 256; omp_lock_t locks[set_ct]; for (int i = 0; i < set_ct; i++) omp_init_lock(&locks[i]); // for each block in the sequence for (size_t j = 0; j < seq.size(); j += opts.block_size) { // block: fixed length subsequence of DNA/protein, handled in series, // consecutive blocks overlap by k-1 characters size_t block_start = j; size_t block_finish = j + opts.block_size + opts.k - 1; if (block_finish > seq.size()) block_finish = seq.size(); std::set minimizer_sets[set_ct]; // for each subblock in the block, gather minimizers in parallel #pragma omp parallel for schedule(dynamic) for (size_t i = block_start; i < block_finish; i += opts.subblock_size) { // subblock: fixed length subsequence of block, handled in parallel, // consecutive subblocks overlap by k-1 characters MinimizerScanner scanner(opts.k, opts.l, opts.spaced_seed_mask, ! opts.input_is_protein, opts.toggle_mask); size_t subblock_finish = i + opts.subblock_size + opts.k - 1; if (subblock_finish > block_finish) subblock_finish = block_finish; scanner.LoadSequence(seq, i, subblock_finish); uint64_t *minimizer_ptr; while ((minimizer_ptr = scanner.NextMinimizer())) { if (scanner.is_ambiguous()) continue; auto hc = MurmurHash3(*minimizer_ptr); // Hash-based subsampling if (opts.min_clear_hash_value && hc < opts.min_clear_hash_value) continue; auto zone = hc % set_ct; omp_set_lock(&locks[zone]); minimizer_sets[zone].insert(*minimizer_ptr); omp_unset_lock(&locks[zone]); } } // end subblock for loop // combine sets into sorted list std::vector minimizer_lists[set_ct]; size_t minimizer_list_prefix_sizes[set_ct+1] = {0}; #pragma omp parallel for for (int i = 0; i < set_ct; i++) { minimizer_lists[i].reserve(minimizer_sets[i].size()); for (auto &m : minimizer_sets[i]) minimizer_lists[i].push_back(m); sort(minimizer_lists[i].begin(), minimizer_lists[i].end()); minimizer_list_prefix_sizes[i+1] = minimizer_lists[i].size(); } for (int i = 2; i <= set_ct; i++) minimizer_list_prefix_sizes[i] += minimizer_list_prefix_sizes[i-1]; std::vector minimizer_list(minimizer_list_prefix_sizes[set_ct]); #pragma omp parallel for for (int i = 0; i < set_ct; i++) { std::copy(minimizer_lists[i].begin(), minimizer_lists[i].end(), minimizer_list.begin() + minimizer_list_prefix_sizes[i]); } size_t mm_ct = minimizer_list.size(); std::vector index_list(mm_ct); std::vector insertion_list(mm_ct); for (size_t i = 0; i < mm_ct; i++) insertion_list[i] = true; // Loop to enforce deterministic order while (! minimizer_list.empty()) { // Gather insertion point information for all remaining minimizers #pragma omp parallel for for (size_t i = 0; i < mm_ct; i++) { // once we've determined that a minimizer won't be an insertion, // don't bother calling FindIndex() again if (insertion_list[i]) { size_t idx; insertion_list[i] = ! hash.FindIndex(minimizer_list[i], &idx); index_list[i] = idx; } } // Determine safe prefix of sorted set to insert in parallel std::set novel_insertion_points; size_t safe_ct = 0; for (safe_ct = 0; safe_ct < mm_ct; safe_ct++) { if (insertion_list[safe_ct]) { if (novel_insertion_points.count(index_list[safe_ct]) > 0) break; novel_insertion_points.insert(index_list[safe_ct]); } } // Adjust CHT values for all keys in the safe zone in parallel #pragma omp parallel for for (size_t i = 0; i < safe_ct; i++) { SetMinimizerLCA(hash, minimizer_list[i], taxid, tax); } // Remove safe prefix and re-iterate to process remainder minimizer_list.erase(minimizer_list.begin(), minimizer_list.begin() + safe_ct); index_list.erase(index_list.begin(), index_list.begin() + safe_ct); insertion_list.erase(insertion_list.begin(), insertion_list.begin() + safe_ct); mm_ct -= safe_ct; } // end deterministic order while loop } // end block for loop for (int i = 0; i < set_ct; i++) omp_destroy_lock(&locks[i]); } void ReadIDToTaxonMap(map &id_map, string &filename) { ifstream map_file(filename.c_str()); if (! map_file.good()) err(EX_NOINPUT, "unable to read from '%s'", filename.c_str()); string line; while (getline(map_file, line)) { string seq_id; taxid_t taxid; istringstream iss(line); iss >> seq_id; iss >> taxid; id_map[seq_id] = taxid; } } void GenerateTaxonomy(Options &opts, map &id_map) { NCBITaxonomy ncbi_taxonomy( opts.ncbi_taxonomy_directory + "/nodes.dmp", opts.ncbi_taxonomy_directory + "/names.dmp" ); for (auto &kv_pair : id_map) { if (kv_pair.second != 0) { ncbi_taxonomy.MarkNode(kv_pair.second); } } ncbi_taxonomy.ConvertToKrakenTaxonomy(opts.taxonomy_filename.c_str()); } void ParseCommandLine(int argc, char **argv, Options &opts) { int opt; long long sig; while ((opt = getopt(argc, argv, "?hB:b:c:FH:m:n:o:t:k:l:M:p:r:s:S:T:X")) != -1) { switch (opt) { case 'h' : case '?' : usage(0); break; case 'B' : sig = atoll(optarg); if (sig < 1) errx(EX_USAGE, "must have positive block size"); opts.block_size = sig; break; case 'b' : sig = atoll(optarg); if (sig < 1) errx(EX_USAGE, "must have positive subblock size"); opts.subblock_size = sig; break; case 'r' : sig = atoll(optarg); if (sig < 0) errx(EX_USAGE, "can't have negative bit storage"); if (sig > 31) errx(EX_USAGE, "can't have more than 31 bits of storage for taxid"); opts.requested_bits_for_taxid = sig; break; case 'p' : opts.num_threads = atoll(optarg); if (opts.num_threads < 1) errx(EX_USAGE, "can't have negative number of threads"); if (opts.num_threads > omp_get_max_threads()) errx(EX_USAGE, "OMP only wants you to use %d threads", omp_get_max_threads()); break; case 'H' : opts.hashtable_filename = optarg; break; case 'm' : opts.ID_to_taxon_map_filename = optarg; break; case 'n' : opts.ncbi_taxonomy_directory = optarg; break; case 'o' : opts.options_filename = optarg; break; case 't' : opts.taxonomy_filename = optarg; break; case 'S' : opts.spaced_seed_mask = strtol(optarg, nullptr, 2); break; case 'T' : opts.toggle_mask = strtol(optarg, nullptr, 2); break; case 'k' : sig = atoll(optarg); if (sig < 1) errx(EX_USAGE, "k must be positive integer"); opts.k = sig; break; case 'l' : sig = atoll(optarg); if (sig < 1) errx(EX_USAGE, "l must be positive integer"); if (sig > 31) errx(EX_USAGE, "l must be no more than 31"); opts.l = sig; break; case 'c' : sig = atoll(optarg); if (sig < 1) errx(EX_USAGE, "capacity must be positive integer"); opts.capacity = sig; break; case 'M' : sig = atoll(optarg); if (sig < 1) errx(EX_USAGE, "max capacity must be positive integer"); opts.maximum_capacity = sig; break; case 'F' : opts.deterministic_build = false; break; case 'X' : opts.input_is_protein = true; break; } } if (opts.spaced_seed_mask != DEFAULT_SPACED_SEED_MASK) ExpandSpacedSeedMask(opts.spaced_seed_mask, opts.input_is_protein ? BITS_PER_CHAR_PRO : BITS_PER_CHAR_DNA); if (opts.hashtable_filename.empty() || opts.ID_to_taxon_map_filename.empty() || opts.ncbi_taxonomy_directory.empty() || opts.options_filename.empty() || opts.taxonomy_filename.empty()) { cerr << "missing mandatory filename parameter" << endl; usage(); } if (opts.k == 0 || opts.l == 0 || opts.capacity == 0) { cerr << "missing mandatory integer parameter" << endl; usage(); } if (opts.k < opts.l) { cerr << "k cannot be less than l" << endl; usage(); } if (opts.block_size < opts.subblock_size) { cerr << "block size cannot be less than subblock size\n"; usage(); } if (opts.maximum_capacity > opts.capacity) { cerr << "maximum capacity option shouldn't specify larger capacity than normal" << endl; usage(); } } void usage(int exit_code) { cerr << "Usage: build_db \n" << "\n" << "Options (*mandatory):\n" << "* -H FILENAME Kraken 2 hash table filename\n" << "* -m FILENAME Sequence ID to taxon map filename\n" << "* -t FILENAME Kraken 2 taxonomy filename\n" << "* -n DIR NCBI taxonomy directory name\n" << "* -o FILENAME Kraken 2 options filename\n" << "* -k INT Set length of k-mers\n" << "* -l INT Set length of minimizers\n" << "* -c INT Set capacity of hash table\n" << " -M INT Set maximum capacity of hash table (MiniKraken)\n" << " -S BITSTRING Spaced seed mask\n" << " -T BITSTRING Minimizer toggle mask\n" << " -X Input seqs. are proteins\n" << " -p INT Number of threads\n" << " -F Use fast, nondeterministic building method\n" << " -B INT Read block size\n" << " -b INT Read subblock size\n" << " -r INT Bit storage requested for taxid" << endl; exit(exit_code); } kraken2-2.1.2/src/classify.cc000066400000000000000000000676751404614725400160200ustar00rootroot00000000000000/* * Copyright 2013-2021, Derrick Wood * * This file is part of the Kraken 2 taxonomic sequence classification system. */ #include "kraken2_headers.h" #include "kv_store.h" #include "taxonomy.h" #include "seqreader.h" #include "mmscanner.h" #include "compact_hash.h" #include "kraken2_data.h" #include "aa_translate.h" #include "reports.h" #include "utilities.h" #include "readcounts.h" using namespace kraken2; using std::cout; using std::cerr; using std::endl; using std::ifstream; using std::map; using std::ostringstream; using std::ofstream; using std::set; using std::string; using std::vector; using namespace kraken2; static const size_t NUM_FRAGMENTS_PER_THREAD = 10000; static const taxid_t MATE_PAIR_BORDER_TAXON = TAXID_MAX; static const taxid_t READING_FRAME_BORDER_TAXON = TAXID_MAX - 1; static const taxid_t AMBIGUOUS_SPAN_TAXON = TAXID_MAX - 2; struct Options { string index_filename; string taxonomy_filename; string options_filename; string report_filename; string classified_output_filename; string unclassified_output_filename; string kraken_output_filename; bool mpa_style_report; bool report_kmer_data; bool quick_mode; bool report_zero_counts; bool use_translated_search; bool print_scientific_name; double confidence_threshold; int num_threads; bool paired_end_processing; bool single_file_pairs; int minimum_quality_score; int minimum_hit_groups; bool use_memory_mapping; bool match_input_order; }; struct ClassificationStats { uint64_t total_sequences; uint64_t total_bases; uint64_t total_classified; }; struct OutputStreamData { bool initialized; bool printing_sequences; std::ostream *classified_output1; std::ostream *classified_output2; std::ostream *unclassified_output1; std::ostream *unclassified_output2; std::ostream *kraken_output; }; struct OutputData { uint64_t block_id; string kraken_str; string classified_out1_str; string classified_out2_str; string unclassified_out1_str; string unclassified_out2_str; }; void ParseCommandLine(int argc, char **argv, Options &opts); void usage(int exit_code=EX_USAGE); void ProcessFiles(const char *filename1, const char *filename2, KeyValueStore *hash, Taxonomy &tax, IndexOptions &idx_opts, Options &opts, ClassificationStats &stats, OutputStreamData &outputs, taxon_counters_t &total_taxon_counters); taxid_t ClassifySequence(Sequence &dna, Sequence &dna2, ostringstream &koss, KeyValueStore *hash, Taxonomy &tax, IndexOptions &idx_opts, Options &opts, ClassificationStats &stats, MinimizerScanner &scanner, vector &taxa, taxon_counts_t &hit_counts, vector &tx_frames, taxon_counters_t &my_taxon_counts); void AddHitlistString(ostringstream &oss, vector &taxa, Taxonomy &taxonomy); taxid_t ResolveTree(taxon_counts_t &hit_counts, Taxonomy &tax, size_t total_minimizers, Options &opts); void ReportStats(struct timeval time1, struct timeval time2, ClassificationStats &stats); void InitializeOutputs(Options &opts, OutputStreamData &outputs, SequenceFormat format); void MaskLowQualityBases(Sequence &dna, int minimum_quality_score); int main(int argc, char **argv) { Options opts; opts.quick_mode = false; opts.confidence_threshold = 0; opts.paired_end_processing = false; opts.single_file_pairs = false; opts.num_threads = 1; opts.mpa_style_report = false; opts.report_kmer_data = false; opts.report_zero_counts = false; opts.use_translated_search = false; opts.print_scientific_name = false; opts.minimum_quality_score = 0; opts.minimum_hit_groups = 0; opts.use_memory_mapping = false; taxon_counters_t taxon_counters; // stats per taxon ParseCommandLine(argc, argv, opts); omp_set_num_threads(opts.num_threads); cerr << "Loading database information..."; IndexOptions idx_opts = {0}; ifstream idx_opt_fs(opts.options_filename); struct stat sb; if (stat(opts.options_filename.c_str(), &sb) < 0) errx(EX_OSERR, "unable to get filesize of %s", opts.options_filename.c_str()); auto opts_filesize = sb.st_size; idx_opt_fs.read((char *) &idx_opts, opts_filesize); opts.use_translated_search = ! idx_opts.dna_db; Taxonomy taxonomy(opts.taxonomy_filename, opts.use_memory_mapping); KeyValueStore *hash_ptr = new CompactHashTable(opts.index_filename, opts.use_memory_mapping); cerr << " done." << endl; ClassificationStats stats = {0, 0, 0}; OutputStreamData outputs = { false, false, nullptr, nullptr, nullptr, nullptr, &std::cout }; struct timeval tv1, tv2; gettimeofday(&tv1, nullptr); if (optind == argc) { if (opts.paired_end_processing && ! opts.single_file_pairs) errx(EX_USAGE, "paired end processing used with no files specified"); ProcessFiles(nullptr, nullptr, hash_ptr, taxonomy, idx_opts, opts, stats, outputs, taxon_counters); } else { for (int i = optind; i < argc; i++) { if (opts.paired_end_processing && ! opts.single_file_pairs) { if (i + 1 == argc) { errx(EX_USAGE, "paired end processing used with unpaired file"); } ProcessFiles(argv[i], argv[i+1], hash_ptr, taxonomy, idx_opts, opts, stats, outputs, taxon_counters); i += 1; } else { ProcessFiles(argv[i], nullptr, hash_ptr, taxonomy, idx_opts, opts, stats, outputs, taxon_counters); } } } gettimeofday(&tv2, nullptr); delete hash_ptr; ReportStats(tv1, tv2, stats); if (! opts.report_filename.empty()) { if (opts.mpa_style_report) ReportMpaStyle(opts.report_filename, opts.report_zero_counts, taxonomy, taxon_counters); else { auto total_unclassified = stats.total_sequences - stats.total_classified; ReportKrakenStyle(opts.report_filename, opts.report_zero_counts, opts.report_kmer_data, taxonomy, taxon_counters, stats.total_sequences, total_unclassified); } } return 0; } void ReportStats(struct timeval time1, struct timeval time2, ClassificationStats &stats) { time2.tv_usec -= time1.tv_usec; time2.tv_sec -= time1.tv_sec; if (time2.tv_usec < 0) { time2.tv_sec--; time2.tv_usec += 1000000; } double seconds = time2.tv_usec; seconds /= 1e6; seconds += time2.tv_sec; uint64_t total_unclassified = stats.total_sequences - stats.total_classified; if (isatty(fileno(stderr))) cerr << "\r"; fprintf(stderr, "%llu sequences (%.2f Mbp) processed in %.3fs (%.1f Kseq/m, %.2f Mbp/m).\n", (unsigned long long) stats.total_sequences, stats.total_bases / 1.0e6, seconds, stats.total_sequences / 1.0e3 / (seconds / 60), stats.total_bases / 1.0e6 / (seconds / 60) ); fprintf(stderr, " %llu sequences classified (%.2f%%)\n", (unsigned long long) stats.total_classified, stats.total_classified * 100.0 / stats.total_sequences); fprintf(stderr, " %llu sequences unclassified (%.2f%%)\n", (unsigned long long) total_unclassified, total_unclassified * 100.0 / stats.total_sequences); } void ProcessFiles(const char *filename1, const char *filename2, KeyValueStore *hash, Taxonomy &tax, IndexOptions &idx_opts, Options &opts, ClassificationStats &stats, OutputStreamData &outputs, taxon_counters_t &total_taxon_counters) { std::istream *fptr1 = nullptr, *fptr2 = nullptr; if (filename1 == nullptr) fptr1 = &std::cin; else { fptr1 = new std::ifstream(filename1); } if (opts.paired_end_processing && ! opts.single_file_pairs) { fptr2 = new std::ifstream(filename2); } // The priority queue for output is designed to ensure fragment data // is output in the same order it was input auto comparator = [](const OutputData &a, const OutputData &b) { return a.block_id > b.block_id; }; std::priority_queue, decltype(comparator)> output_queue(comparator); uint64_t next_input_block_id = 0; uint64_t next_output_block_id = 0; omp_lock_t output_lock; omp_init_lock(&output_lock); #pragma omp parallel { MinimizerScanner scanner(idx_opts.k, idx_opts.l, idx_opts.spaced_seed_mask, idx_opts.dna_db, idx_opts.toggle_mask, idx_opts.revcom_version); vector taxa; taxon_counts_t hit_counts; ostringstream kraken_oss, c1_oss, c2_oss, u1_oss, u2_oss; ClassificationStats thread_stats = {0, 0, 0}; vector translated_frames(6); BatchSequenceReader reader1, reader2; Sequence seq1, seq2; uint64_t block_id; OutputData out_data; taxon_counters_t thread_taxon_counters; while (true) { thread_stats.total_sequences = 0; thread_stats.total_bases = 0; thread_stats.total_classified = 0; auto ok_read = false; #pragma omp critical(seqread) { // Input processing block if (! opts.paired_end_processing) { // Unpaired data? Just read in a sized block ok_read = reader1.LoadBlock(*fptr1, (size_t)(3 * 1024 * 1024)); } else if (! opts.single_file_pairs) { // Paired data in 2 files? Read a line-counted batch from each file. ok_read = reader1.LoadBatch(*fptr1, NUM_FRAGMENTS_PER_THREAD); if (ok_read && opts.paired_end_processing) ok_read = reader2.LoadBatch(*fptr2, NUM_FRAGMENTS_PER_THREAD); } else { auto frags = NUM_FRAGMENTS_PER_THREAD * 2; // Ensure frag count is even - just in case above line is changed if (frags % 2 == 1) frags++; ok_read = reader1.LoadBatch(*fptr1, frags); } block_id = next_input_block_id++; } if (! ok_read) break; // Reset all dynamically-growing things kraken_oss.str(""); c1_oss.str(""); c2_oss.str(""); u1_oss.str(""); u2_oss.str(""); thread_taxon_counters.clear(); while (true) { auto valid_fragment = reader1.NextSequence(seq1); if (opts.paired_end_processing && valid_fragment) { if (opts.single_file_pairs) valid_fragment = reader1.NextSequence(seq2); else valid_fragment = reader2.NextSequence(seq2); } if (! valid_fragment) break; thread_stats.total_sequences++; if (opts.minimum_quality_score > 0) { MaskLowQualityBases(seq1, opts.minimum_quality_score); if (opts.paired_end_processing) MaskLowQualityBases(seq2, opts.minimum_quality_score); } auto call = ClassifySequence(seq1, seq2, kraken_oss, hash, tax, idx_opts, opts, thread_stats, scanner, taxa, hit_counts, translated_frames, thread_taxon_counters); if (call) { char buffer[1024] = ""; sprintf(buffer, " kraken:taxid|%llu", (unsigned long long) tax.nodes()[call].external_id); seq1.header += buffer; c1_oss << seq1.to_string(); if (opts.paired_end_processing) { seq2.header += buffer; c2_oss << seq2.to_string(); } } else { u1_oss << seq1.to_string(); if (opts.paired_end_processing) u2_oss << seq2.to_string(); } thread_stats.total_bases += seq1.seq.size(); if (opts.paired_end_processing) thread_stats.total_bases += seq2.seq.size(); } #pragma omp atomic stats.total_sequences += thread_stats.total_sequences; #pragma omp atomic stats.total_bases += thread_stats.total_bases; #pragma omp atomic stats.total_classified += thread_stats.total_classified; #pragma omp critical(output_stats) { if (isatty(fileno(stderr))) cerr << "\rProcessed " << stats.total_sequences << " sequences (" << stats.total_bases << " bp) ..."; } if (! outputs.initialized) { InitializeOutputs(opts, outputs, reader1.file_format()); } out_data.block_id = block_id; out_data.kraken_str.assign(kraken_oss.str()); out_data.classified_out1_str.assign(c1_oss.str()); out_data.classified_out2_str.assign(c2_oss.str()); out_data.unclassified_out1_str.assign(u1_oss.str()); out_data.unclassified_out2_str.assign(u2_oss.str()); #pragma omp critical(output_queue) { output_queue.push(out_data); } #pragma omp critical(update_taxon_counters) { for (auto &kv_pair : thread_taxon_counters) { total_taxon_counters[kv_pair.first] += std::move(kv_pair.second); } } bool output_loop = true; while (output_loop) { #pragma omp critical(output_queue) { output_loop = ! output_queue.empty(); if (output_loop) { out_data = output_queue.top(); if (out_data.block_id == next_output_block_id) { output_queue.pop(); // Acquiring output lock obligates thread to print out // next output data block, contained in out_data omp_set_lock(&output_lock); next_output_block_id++; } else output_loop = false; } } if (! output_loop) break; if (outputs.kraken_output != nullptr) (*outputs.kraken_output) << out_data.kraken_str; if (outputs.classified_output1 != nullptr) (*outputs.classified_output1) << out_data.classified_out1_str; if (outputs.classified_output2 != nullptr) (*outputs.classified_output2) << out_data.classified_out2_str; if (outputs.unclassified_output1 != nullptr) (*outputs.unclassified_output1) << out_data.unclassified_out1_str; if (outputs.unclassified_output2 != nullptr) (*outputs.unclassified_output2) << out_data.unclassified_out2_str; omp_unset_lock(&output_lock); } // end while output loop } // end while } // end parallel block omp_destroy_lock(&output_lock); if (fptr1 != nullptr) delete fptr1; if (fptr2 != nullptr) delete fptr2; if (outputs.kraken_output != nullptr) (*outputs.kraken_output) << std::flush; if (outputs.classified_output1 != nullptr) (*outputs.classified_output1) << std::flush; if (outputs.classified_output2 != nullptr) (*outputs.classified_output2) << std::flush; if (outputs.unclassified_output1 != nullptr) (*outputs.unclassified_output1) << std::flush; if (outputs.unclassified_output2 != nullptr) (*outputs.unclassified_output2) << std::flush; } taxid_t ResolveTree(taxon_counts_t &hit_counts, Taxonomy &taxonomy, size_t total_minimizers, Options &opts) { taxid_t max_taxon = 0; uint32_t max_score = 0; uint32_t required_score = ceil(opts.confidence_threshold * total_minimizers); // Sum each taxon's LTR path, find taxon with highest LTR score for (auto &kv_pair : hit_counts) { taxid_t taxon = kv_pair.first; uint32_t score = 0; for (auto &kv_pair2 : hit_counts) { taxid_t taxon2 = kv_pair2.first; if (taxonomy.IsAAncestorOfB(taxon2, taxon)) { score += kv_pair2.second; } } if (score > max_score) { max_score = score; max_taxon = taxon; } else if (score == max_score) { max_taxon = taxonomy.LowestCommonAncestor(max_taxon, taxon); } } // Reset max. score to be only hits at the called taxon max_score = hit_counts[max_taxon]; // We probably have a call w/o required support (unless LCA resolved tie) while (max_taxon && max_score < required_score) { max_score = 0; for (auto &kv_pair : hit_counts) { taxid_t taxon = kv_pair.first; // Add to score if taxon in max_taxon's clade if (taxonomy.IsAAncestorOfB(max_taxon, taxon)) { max_score += kv_pair.second; } } // Score is now sum of hits at max_taxon and w/in max_taxon clade if (max_score >= required_score) // Kill loop and return, we've got enough support here return max_taxon; else // Run up tree until confidence threshold is met // Run off tree if required score isn't met max_taxon = taxonomy.nodes()[max_taxon].parent_id; } return max_taxon; } std::string TrimPairInfo(std::string &id) { size_t sz = id.size(); if (sz <= 2) return id; if ( id[sz - 2] == '/' && (id[sz - 1] == '1' || id[sz - 1] == '2') ) return id.substr(0, sz - 2); return id; } taxid_t ClassifySequence(Sequence &dna, Sequence &dna2, ostringstream &koss, KeyValueStore *hash, Taxonomy &taxonomy, IndexOptions &idx_opts, Options &opts, ClassificationStats &stats, MinimizerScanner &scanner, vector &taxa, taxon_counts_t &hit_counts, vector &tx_frames, taxon_counters_t &curr_taxon_counts) { uint64_t *minimizer_ptr; taxid_t call = 0; taxa.clear(); hit_counts.clear(); auto frame_ct = opts.use_translated_search ? 6 : 1; int64_t minimizer_hit_groups = 0; for (int mate_num = 0; mate_num < 2; mate_num++) { if (mate_num == 1 && ! opts.paired_end_processing) break; if (opts.use_translated_search) { TranslateToAllFrames(mate_num == 0 ? dna.seq : dna2.seq, tx_frames); } // index of frame is 0 - 5 w/ tx search (or 0 if no tx search) for (int frame_idx = 0; frame_idx < frame_ct; frame_idx++) { if (opts.use_translated_search) { scanner.LoadSequence(tx_frames[frame_idx]); } else { scanner.LoadSequence(mate_num == 0 ? dna.seq : dna2.seq); } uint64_t last_minimizer = UINT64_MAX; taxid_t last_taxon = TAXID_MAX; while ((minimizer_ptr = scanner.NextMinimizer()) != nullptr) { taxid_t taxon; if (scanner.is_ambiguous()) { taxon = AMBIGUOUS_SPAN_TAXON; } else { if (*minimizer_ptr != last_minimizer) { bool skip_lookup = false; if (idx_opts.minimum_acceptable_hash_value) { if (MurmurHash3(*minimizer_ptr) < idx_opts.minimum_acceptable_hash_value) skip_lookup = true; } taxon = 0; if (! skip_lookup) taxon = hash->Get(*minimizer_ptr); last_taxon = taxon; last_minimizer = *minimizer_ptr; // Increment this only if (a) we have DB hit and // (b) minimizer != last minimizer if (taxon) { minimizer_hit_groups++; // New minimizer should trigger registering minimizer in RC/HLL curr_taxon_counts[taxon].add_kmer(scanner.last_minimizer()); } } else { taxon = last_taxon; } if (taxon) { if (opts.quick_mode && minimizer_hit_groups >= opts.minimum_hit_groups) { call = taxon; goto finished_searching; // need to break 3 loops here } hit_counts[taxon]++; } } taxa.push_back(taxon); } if (opts.use_translated_search && frame_idx != 5) taxa.push_back(READING_FRAME_BORDER_TAXON); } if (opts.paired_end_processing && mate_num == 0) taxa.push_back(MATE_PAIR_BORDER_TAXON); } finished_searching: auto total_kmers = taxa.size(); if (opts.paired_end_processing) total_kmers--; // account for the mate pair marker if (opts.use_translated_search) // account for reading frame markers total_kmers -= opts.paired_end_processing ? 4 : 2; call = ResolveTree(hit_counts, taxonomy, total_kmers, opts); // Void a call made by too few minimizer groups if (call && minimizer_hit_groups < opts.minimum_hit_groups) call = 0; if (call) { stats.total_classified++; curr_taxon_counts[call].incrementReadCount(); } if (call) koss << "C\t"; else koss << "U\t"; if (! opts.paired_end_processing) koss << dna.id << "\t"; else koss << TrimPairInfo(dna.id) << "\t"; auto ext_call = taxonomy.nodes()[call].external_id; if (opts.print_scientific_name) { const char *name = nullptr; if (call) { name = taxonomy.name_data() + taxonomy.nodes()[call].name_offset; } koss << (name ? name : "unclassified") << " (taxid " << ext_call << ")"; } else { koss << ext_call; } koss << "\t"; if (! opts.paired_end_processing) koss << dna.seq.size() << "\t"; else koss << dna.seq.size() << "|" << dna2.seq.size() << "\t"; if (opts.quick_mode) { koss << ext_call << ":Q"; } else { if (taxa.empty()) koss << "0:0"; else AddHitlistString(koss, taxa, taxonomy); } koss << endl; return call; } void AddHitlistString(ostringstream &oss, vector &taxa, Taxonomy &taxonomy) { auto last_code = taxa[0]; auto code_count = 1; for (size_t i = 1; i < taxa.size(); i++) { auto code = taxa[i]; if (code == last_code) { code_count += 1; } else { if (last_code != MATE_PAIR_BORDER_TAXON && last_code != READING_FRAME_BORDER_TAXON) { if (last_code == AMBIGUOUS_SPAN_TAXON) { oss << "A:" << code_count << " "; } else { auto ext_code = taxonomy.nodes()[last_code].external_id; oss << ext_code << ":" << code_count << " "; } } else { // mate pair/reading frame marker oss << (last_code == MATE_PAIR_BORDER_TAXON ? "|:| " : "-:- "); } code_count = 1; last_code = code; } } if (last_code != MATE_PAIR_BORDER_TAXON && last_code != READING_FRAME_BORDER_TAXON) { if (last_code == AMBIGUOUS_SPAN_TAXON) { oss << "A:" << code_count << " "; } else { auto ext_code = taxonomy.nodes()[last_code].external_id; oss << ext_code << ":" << code_count; } } else { // mate pair/reading frame marker oss << (last_code == MATE_PAIR_BORDER_TAXON ? "|:|" : "-:-"); } } void InitializeOutputs(Options &opts, OutputStreamData &outputs, SequenceFormat format) { #pragma omp critical(output_init) { if (! outputs.initialized) { if (! opts.classified_output_filename.empty()) { if (opts.paired_end_processing) { vector fields = SplitString(opts.classified_output_filename, "#", 3); if (fields.size() < 2) { errx(EX_DATAERR, "Paired filename format missing # character: %s", opts.classified_output_filename.c_str()); } else if (fields.size() > 2) { errx(EX_DATAERR, "Paired filename format has >1 # character: %s", opts.classified_output_filename.c_str()); } outputs.classified_output1 = new ofstream(fields[0] + "_1" + fields[1]); outputs.classified_output2 = new ofstream(fields[0] + "_2" + fields[1]); } else outputs.classified_output1 = new ofstream(opts.classified_output_filename); outputs.printing_sequences = true; } if (! opts.unclassified_output_filename.empty()) { if (opts.paired_end_processing) { vector fields = SplitString(opts.unclassified_output_filename, "#", 3); if (fields.size() < 2) { errx(EX_DATAERR, "Paired filename format missing # character: %s", opts.unclassified_output_filename.c_str()); } else if (fields.size() > 2) { errx(EX_DATAERR, "Paired filename format has >1 # character: %s", opts.unclassified_output_filename.c_str()); } outputs.unclassified_output1 = new ofstream(fields[0] + "_1" + fields[1]); outputs.unclassified_output2 = new ofstream(fields[0] + "_2" + fields[1]); } else outputs.unclassified_output1 = new ofstream(opts.unclassified_output_filename); outputs.printing_sequences = true; } if (! opts.kraken_output_filename.empty()) { if (opts.kraken_output_filename == "-") // Special filename to silence Kraken output outputs.kraken_output = nullptr; else outputs.kraken_output = new ofstream(opts.kraken_output_filename); } outputs.initialized = true; } } } void MaskLowQualityBases(Sequence &dna, int minimum_quality_score) { if (dna.format != FORMAT_FASTQ) return; if (dna.seq.size() != dna.quals.size()) errx(EX_DATAERR, "%s: Sequence length (%d) != Quality string length (%d)", dna.id.c_str(), (int) dna.seq.size(), (int) dna.quals.size()); for (size_t i = 0; i < dna.seq.size(); i++) { if ((dna.quals[i] - '!') < minimum_quality_score) dna.seq[i] = 'x'; } } void ParseCommandLine(int argc, char **argv, Options &opts) { int opt; while ((opt = getopt(argc, argv, "h?H:t:o:T:p:R:C:U:O:Q:g:nmzqPSMK")) != -1) { switch (opt) { case 'h' : case '?' : usage(0); break; case 'H' : opts.index_filename = optarg; break; case 't' : opts.taxonomy_filename = optarg; break; case 'T' : opts.confidence_threshold = std::stod(optarg); if (opts.confidence_threshold < 0 || opts.confidence_threshold > 1) { errx(EX_USAGE, "confidence threshold must be in [0, 1]"); } break; case 'o' : opts.options_filename = optarg; break; case 'q' : opts.quick_mode = true; break; case 'p' : opts.num_threads = atoi(optarg); if (opts.num_threads < 1) errx(EX_USAGE, "number of threads can't be less than 1"); break; case 'g' : opts.minimum_hit_groups = atoi(optarg); break; case 'P' : opts.paired_end_processing = true; break; case 'S' : opts.paired_end_processing = true; opts.single_file_pairs = true; break; case 'm' : opts.mpa_style_report = true; break; case 'K': opts.report_kmer_data = true; break; case 'R' : opts.report_filename = optarg; break; case 'z' : opts.report_zero_counts = true; break; case 'C' : opts.classified_output_filename = optarg; break; case 'U' : opts.unclassified_output_filename = optarg; break; case 'O' : opts.kraken_output_filename = optarg; break; case 'n' : opts.print_scientific_name = true; break; case 'Q' : opts.minimum_quality_score = atoi(optarg); break; case 'M' : opts.use_memory_mapping = true; break; } } if (opts.index_filename.empty() || opts.taxonomy_filename.empty() || opts.options_filename.empty()) { warnx("mandatory filename missing"); usage(); } if (opts.mpa_style_report && opts.report_filename.empty()) { warnx("-m requires -R be used"); usage(); } } void usage(int exit_code) { cerr << "Usage: classify [options] " << endl << endl << "Options: (*mandatory)" << endl << "* -H filename Kraken 2 index filename" << endl << "* -t filename Kraken 2 taxonomy filename" << endl << "* -o filename Kraken 2 options filename" << endl << " -q Quick mode" << endl << " -M Use memory mapping to access hash & taxonomy" << endl << " -T NUM Confidence score threshold (def. 0)" << endl << " -p NUM Number of threads (def. 1)" << endl << " -Q NUM Minimum quality score (FASTQ only, def. 0)" << endl << " -P Process pairs of reads" << endl << " -S Process pairs with mates in same file" << endl << " -R filename Print report to filename" << endl << " -m In comb. w/ -R, use mpa-style report" << endl << " -z In comb. w/ -R, report taxa w/ 0 count" << endl << " -n Print scientific name instead of taxid in Kraken output" << endl << " -g NUM Minimum number of hit groups needed for call" << endl << " -C filename Filename/format to have classified sequences" << endl << " -U filename Filename/format to have unclassified sequences" << endl << " -O filename Output file for normal Kraken output" << endl << " -K In comb. w/ -R, provide minimizer information in report" << endl; exit(exit_code); } kraken2-2.1.2/src/compact_hash.cc000066400000000000000000000201501404614725400166050ustar00rootroot00000000000000/* * Copyright 2013-2021, Derrick Wood * * This file is part of the Kraken 2 taxonomic sequence classification system. */ #include "compact_hash.h" using std::string; using std::ofstream; using std::unordered_map; namespace kraken2 { CompactHashTable::CompactHashTable(size_t capacity, size_t key_bits, size_t value_bits) : capacity_(capacity), size_(0), key_bits_(key_bits), value_bits_(value_bits), file_backed_(false), locks_initialized_(true) { if (key_bits + value_bits != sizeof(*table_) * 8) errx(EX_SOFTWARE, "sum of key bits and value bits must equal %u", (unsigned int) (sizeof(*table_) * 8)); if (key_bits == 0) errx(EX_SOFTWARE, "key bits cannot be zero"); if (value_bits == 0) errx(EX_SOFTWARE, "value bits cannot be zero"); for (size_t i = 0; i < LOCK_ZONES; i++) omp_init_lock(&zone_locks_[i]); try { table_ = new CompactHashCell[capacity_]; } catch (std::bad_alloc ex) { std::cerr << "Failed attempt to allocate " << (sizeof(*table_) * capacity_) << "bytes;\n" << "you may not have enough free memory to build this database.\n" << "Perhaps increasing the k-mer length, or reducing memory usage from\n" << "other programs could help you build this database?" << std::endl; errx(EX_OSERR, "unable to allocate hash table memory"); } memset(table_, 0, capacity_ * sizeof(*table_)); } CompactHashTable::CompactHashTable(const string &filename, bool memory_mapping) { LoadTable(filename.c_str(), memory_mapping); } CompactHashTable::CompactHashTable(const char *filename, bool memory_mapping) { LoadTable(filename, memory_mapping); } CompactHashTable::~CompactHashTable() { if (! file_backed_) delete[] table_; if (locks_initialized_) for (size_t i = 0; i < LOCK_ZONES; i++) omp_destroy_lock(&zone_locks_[i]); } void CompactHashTable::LoadTable(const char *filename, bool memory_mapping) { locks_initialized_ = false; if (memory_mapping) { backing_file_.OpenFile(filename); char *ptr = backing_file_.fptr(); memcpy((char *) &capacity_, ptr, sizeof(capacity_)); ptr += sizeof(capacity_); memcpy((char *) &size_, ptr, sizeof(size_)); ptr += sizeof(size_); memcpy((char *) &key_bits_, ptr, sizeof(key_bits_)); ptr += sizeof(key_bits_); memcpy((char *) &value_bits_, ptr, sizeof(value_bits_)); ptr += sizeof(value_bits_); table_ = (CompactHashCell *) ptr; if (backing_file_.filesize() - (ptr - backing_file_.fptr()) != sizeof(*table_) * capacity_) { errx(EX_DATAERR, "Capacity mismatch in %s, aborting", filename); } file_backed_ = true; } else { std::ifstream ifs(filename); ifs.read((char *) &capacity_, sizeof(capacity_)); ifs.read((char *) &size_, sizeof(size_)); ifs.read((char *) &key_bits_, sizeof(key_bits_)); ifs.read((char *) &value_bits_, sizeof(value_bits_)); try { table_ = new CompactHashCell[capacity_]; } catch (std::bad_alloc &ex) { std::cerr << "Failed attempt to allocate " << (sizeof(*table_) * capacity_) << "bytes;\n" << "you may not have enough free memory to load this database.\n" << "If your computer has enough RAM, perhaps reducing memory usage from\n" << "other programs could help you load this database?" << std::endl; errx(EX_OSERR, "unable to allocate hash table memory"); } ifs.read((char *) table_, capacity_ * sizeof(*table_)); if (! ifs) errx(EX_OSERR, "Error reading in hash table"); file_backed_ = false; } } void CompactHashTable::WriteTable(const char *filename) { ofstream ofs(filename, ofstream::binary); ofs.write((char *) &capacity_, sizeof(capacity_)); ofs.write((char *) &size_, sizeof(size_)); ofs.write((char *) &key_bits_, sizeof(key_bits_)); ofs.write((char *) &value_bits_, sizeof(value_bits_)); ofs.write((char *) table_, sizeof(*table_) * capacity_); ofs.close(); } hvalue_t CompactHashTable::Get(hkey_t key) const { uint64_t hc = MurmurHash3(key); uint64_t compacted_key = hc >> (32 + value_bits_); size_t idx = hc % capacity_; size_t first_idx = idx; size_t step = 0; while (true) { if (! table_[idx].value(value_bits_)) // value of 0 means data is 0, saves work break; // search over, empty cell encountered in probe if (table_[idx].hashed_key(value_bits_) == compacted_key) return table_[idx].value(value_bits_); if (step == 0) step = second_hash(hc); idx += step; idx %= capacity_; if (idx == first_idx) break; // search over, we've exhausted the table } return 0; } bool CompactHashTable::FindIndex(hkey_t key, size_t *idx) const { uint64_t hc = MurmurHash3(key); uint64_t compacted_key = hc >> (32 + value_bits_); *idx = hc % capacity_; size_t first_idx = *idx; size_t step = 0; while (true) { if (! table_[*idx].value(value_bits_)) // value of 0 means data is 0, saves work return false; // search over, empty cell encountered in probe if (table_[*idx].hashed_key(value_bits_) == compacted_key) return true; if (step == 0) step = second_hash(hc); *idx += step; *idx %= capacity_; if (*idx == first_idx) break; // search over, we've exhausted the table } return false; } bool CompactHashTable::CompareAndSet (hkey_t key, hvalue_t new_value, hvalue_t *old_value) { if (file_backed_) return false; if (new_value == 0) return false; uint64_t hc = MurmurHash3(key); hkey_t compacted_key = hc >> (32 + value_bits_); size_t idx, first_idx; bool set_successful = false; bool search_successful = false; idx = first_idx = hc % capacity_; size_t step = 0; while (! search_successful) { size_t zone = idx % LOCK_ZONES; omp_set_lock(&zone_locks_[zone]); if (! table_[idx].value(value_bits_) || table_[idx].hashed_key(value_bits_) == compacted_key) { search_successful = true; #pragma omp flush if (*old_value == table_[idx].value(value_bits_)) { table_[idx].populate(compacted_key, new_value, key_bits_, value_bits_); if (! *old_value) { #pragma omp atomic size_++; } set_successful = true; } else { *old_value = table_[idx].value(value_bits_); } } omp_unset_lock(&zone_locks_[zone]); if (step == 0) step = second_hash(hc); idx += step; idx %= capacity_; if (idx == first_idx) errx(EX_SOFTWARE, "compact hash table capacity exceeded"); } return set_successful; } bool CompactHashTable::DirectCompareAndSet (size_t idx, hkey_t key, hvalue_t new_value, hvalue_t *old_value) { uint64_t hc = MurmurHash3(key); hkey_t compacted_key = hc >> (32 + value_bits_); bool set_successful = false; size_t zone = idx % LOCK_ZONES; omp_set_lock(&zone_locks_[zone]); #pragma omp flush if (*old_value == table_[idx].value(value_bits_)) { table_[idx].populate(compacted_key, new_value, key_bits_, value_bits_); if (! *old_value) { #pragma omp atomic size_++; } set_successful = true; } else { *old_value = table_[idx].value(value_bits_); } omp_unset_lock(&zone_locks_[zone]); return set_successful; } // Linear probing may be ok for accuracy, as long as occupancy is < 95% // Linear probing leads to more clustering, longer probing paths, and // higher probability of a false answer // Double hashing can have shorter probing paths, but less cache efficiency inline uint64_t CompactHashTable::second_hash(uint64_t first_hash) const { #ifdef LINEAR_PROBING return 1; #else // Double hashing return (first_hash >> 8) | 1; #endif } taxon_counts_t CompactHashTable::GetValueCounts() const { taxon_counts_t value_counts; int thread_ct = omp_get_max_threads(); taxon_counts_t thread_value_counts[thread_ct]; #pragma omp parallel for for (size_t i = 0; i < capacity_; i++) { auto val = table_[i].value(value_bits_); if (val) thread_value_counts[omp_get_thread_num()][val]++; } for (auto i = 0; i < thread_ct; i++) { for (auto &kv_pair : thread_value_counts[i]) value_counts[kv_pair.first] += kv_pair.second; } return value_counts; } } // end namespace kraken2-2.1.2/src/compact_hash.h000066400000000000000000000065541404614725400164630ustar00rootroot00000000000000/* * Copyright 2013-2021, Derrick Wood * * This file is part of the Kraken 2 taxonomic sequence classification system. */ #ifndef KRAKEN2_COMPACT_HASH_H_ #define KRAKEN2_COMPACT_HASH_H_ #include "kv_store.h" #include "mmap_file.h" #include "kraken2_headers.h" #include "kraken2_data.h" namespace kraken2 { struct CompactHashCell { inline hkey_t hashed_key(size_t value_bits) { return (hkey_t) (data >> value_bits); } inline hvalue_t value(size_t value_bits) { return (hvalue_t) (data & ((1 << value_bits) - 1)); } void populate(hkey_t compacted_key, hvalue_t val, size_t key_bits, size_t value_bits) { if (key_bits + value_bits != 32) errx(EX_SOFTWARE, "key len of %u and value len of %u don't sum to 32", (unsigned int) key_bits, (unsigned int) value_bits); if (! key_bits || ! value_bits) errx(EX_SOFTWARE, "key len and value len must be nonzero"); uint64_t max_value = (1llu << value_bits) - 1; if (max_value < val) errx(EX_SOFTWARE, "value len of %u too small for value of %llu", (unsigned int) value_bits, (unsigned long long int) val); data = compacted_key << value_bits; data |= val; } // value in the low bits // hash of the key in the high bits uint32_t data; }; /** Implementation of a compact, fixed-size probabilistic hash table. Operations supported: - search - compare-and-set Does NOT support: - values of 0 Keys must be 64-bit unsigned integers, < 2 ** 63. Values must be 32-bit unsigned integers, > 0. Cells are 32-bit unsigned integers, with truncated hashed keys stored in the most significant bits, and truncated values stored in the least significant bits. Compaction level is set when the table is created by the WriteCompactedTable() method of the HashTable class. **/ class CompactHashTable : public KeyValueStore { public: CompactHashTable(size_t capacity, size_t key_bits, size_t value_bits); CompactHashTable(const std::string &filename, bool memory_mapping=false); CompactHashTable(const char *filename, bool memory_mapping=false); ~CompactHashTable(); hvalue_t Get(hkey_t key) const; bool FindIndex(hkey_t key, size_t *idx) const; // How CompareAndSet works: // if *old_value == CHT[key] // CHT[key] = new_value // return true // else // *old_value = CHT[key] // return false bool CompareAndSet(hkey_t key, hvalue_t new_value, hvalue_t *old_value); bool DirectCompareAndSet(size_t idx, hkey_t key, hvalue_t new_value, hvalue_t *old_value); void WriteTable(const char *filename); taxon_counts_t GetValueCounts() const; size_t capacity() const { return capacity_; } size_t size() const { return size_; } size_t key_bits() const { return key_bits_; } size_t value_bits() const { return value_bits_; } double occupancy() const { return size_ * 1.0 / capacity_; } private: static const size_t LOCK_ZONES = 256; size_t capacity_; size_t size_; //size_t index_mask_; size_t key_bits_; size_t value_bits_; CompactHashCell *table_; bool file_backed_; bool locks_initialized_; MMapFile backing_file_; omp_lock_t zone_locks_[LOCK_ZONES]; CompactHashTable(const CompactHashTable &rhs); CompactHashTable& operator=(const CompactHashTable &rhs); void LoadTable(const char *filename, bool memory_mapping); uint64_t second_hash(uint64_t first_hash) const; }; } // end namespace #endif kraken2-2.1.2/src/dump_table.cc000066400000000000000000000105411404614725400162730ustar00rootroot00000000000000/* * Copyright 2013-2021, Derrick Wood * * This file is part of the Kraken 2 taxonomic sequence classification system. */ #include "kraken2_headers.h" #include "compact_hash.h" #include "taxonomy.h" #include "mmscanner.h" #include "kraken2_data.h" #include "reports.h" using std::string; using std::unordered_map; using std::cout; using std::cerr; using std::endl; using namespace kraken2; struct Options { string hashtable_filename; string taxonomy_filename; string options_filename; string output_filename; bool use_mpa_style; bool report_zeros; bool skip_counts; unsigned int num_threads; }; void ParseCommandLine(int argc, char **argv, Options &opts); void usage(int exit_code = EX_USAGE); std::string mask2str(uint64_t mask, int digits) { std::string str; for (int i = digits - 1; i >= 0; i--) str.push_back((mask >> i) & 1 ? '1' : '0'); return str; } int main(int argc, char **argv) { Options opts; opts.report_zeros = false; opts.output_filename = "/dev/fd/1"; opts.use_mpa_style = false; opts.skip_counts = false; opts.num_threads = 1; ParseCommandLine(argc, argv, opts); omp_set_num_threads(opts.num_threads); CompactHashTable kraken_index(opts.hashtable_filename); Taxonomy taxonomy(opts.taxonomy_filename); IndexOptions idx_opts = {0}; std::ifstream idx_opt_fs(opts.options_filename); idx_opt_fs.read((char *) &idx_opts, sizeof(idx_opts)); std::cout << "# Database options: " << (idx_opts.dna_db ? "nucleotide" : "protein") << " db, " << "k = " << idx_opts.k << ", " << "l = " << idx_opts.l << "\n" << "# Spaced mask = " << mask2str(idx_opts.spaced_seed_mask, idx_opts.l * (idx_opts.dna_db ? BITS_PER_CHAR_DNA : BITS_PER_CHAR_PRO)) << "\n" << "# Toggle mask = " << mask2str(idx_opts.toggle_mask, 64) << "\n" << "# Total taxonomy nodes: " << taxonomy.node_count() << "\n" << "# Table size: " << kraken_index.size() << "\n" << "# Table capacity: " << kraken_index.capacity() << "\n" << "# Min clear hash value = " << idx_opts.minimum_acceptable_hash_value << "\n"; if (idx_opts.revcom_version != CURRENT_REVCOM_VERSION) std::cout << "# Built with outdated revcom version\n"; std::cout << std::flush; if (! opts.skip_counts) { taxon_counts_t taxid_counts = kraken_index.GetValueCounts(); uint64_t total_seqs = 0; taxon_counters_t taxid_counters; for (auto &kv_pair : taxid_counts) { total_seqs += kv_pair.second; READCOUNTER rc(kv_pair.second, 0); taxid_counters[kv_pair.first] = rc; } if (opts.use_mpa_style) ReportMpaStyle(opts.output_filename, opts.report_zeros, taxonomy, taxid_counters); else ReportKrakenStyle(opts.output_filename, opts.report_zeros, false, taxonomy, taxid_counters, total_seqs, 0); } return 0; } void ParseCommandLine(int argc, char **argv, Options &opts) { int opt; while ((opt = getopt(argc, argv, "?hH:t:o:O:p:zms")) != -1) { switch (opt) { case 'h' : case '?' : usage(0); break; case 'H' : opts.hashtable_filename = optarg; break; case 't' : opts.taxonomy_filename = optarg; break; case 'o' : opts.options_filename = optarg; break; case 'z' : opts.report_zeros = true; break; case 'O' : opts.output_filename = optarg; break; case 'm' : opts.use_mpa_style = true; break; case 's' : opts.skip_counts = true; break; case 'p' : opts.num_threads = atoi(optarg); break; } } if (opts.hashtable_filename.empty() || opts.taxonomy_filename.empty() || opts.options_filename.empty()) { cerr << "missing mandatory filename parameter" << endl; usage(); } } void usage(int exit_code) { cerr << "Usage: dump_table \n" << "\n" << "Options (*mandatory):\n" << "* -H FILENAME Kraken 2 hash table filename\n" << "* -t FILENAME Kraken 2 taxonomy filename\n" << "* -o FILENAME Kraken 2 database options filename\n" << " -O FILENAME Output filename (def: /dev/fd/1)\n" << " -m Use MPA style output instead of Kraken 2 output\n" << " -s Skip reporting minimizer counts, just show DB stats\n" << " -z Report taxa with zero counts\n"; exit(exit_code); } kraken2-2.1.2/src/estimate_capacity.cc000066400000000000000000000123301404614725400176450ustar00rootroot00000000000000/* * Copyright 2013-2021, Derrick Wood * * This file is part of the Kraken 2 taxonomic sequence classification system. */ #include "kraken2_headers.h" #include "kv_store.h" #include "mmscanner.h" #include "seqreader.h" #include "utilities.h" using std::string; using std::cout; using std::queue; using std::cerr; using std::endl; using std::ifstream; using std::ofstream; using std::unordered_set; using std::vector; using namespace kraken2; #define RANGE_SECTIONS 1024 // must be power of 2 #define RANGE_MASK (RANGE_SECTIONS - 1) #define MAX_N RANGE_SECTIONS #define DEFAULT_N 4 #define DEFAULT_BLOCK_SIZE (30 * 1024 * 1024) // yes, 30 MB struct Options { size_t k, l, n; bool input_is_protein; int threads; size_t block_size; uint64_t spaced_seed_mask; uint64_t toggle_mask; }; void ParseCommandLine(int argc, char **argv, Options &opts); void usage(int exit_code = EX_USAGE); void ProcessSequence(string &seq, Options &opts, vector> &sets); void ProcessSequences(Options &opts); int main(int argc, char **argv) { Options opts; opts.k = 0; opts.l = 0; opts.n = DEFAULT_N; opts.threads = 1; opts.input_is_protein = false; opts.spaced_seed_mask = DEFAULT_SPACED_SEED_MASK; opts.toggle_mask = DEFAULT_TOGGLE_MASK; opts.block_size = DEFAULT_BLOCK_SIZE; ParseCommandLine(argc, argv, opts); omp_set_num_threads(opts.threads); ProcessSequences(opts); return 0; } void ProcessSequences(Options &opts) { vector> sets(opts.n); #pragma omp parallel { bool have_work = true; BatchSequenceReader reader; Sequence sequence; while (have_work) { #pragma omp critical(batch_reading) have_work = reader.LoadBlock(std::cin, opts.block_size); if (have_work) while (reader.NextSequence(sequence)) ProcessSequence(sequence.seq, opts, sets); } } size_t sum_set_sizes = 0; for (auto &s : sets) { sum_set_sizes += s.size(); } sum_set_sizes++; // ensure non-zero estimate cout << (size_t) (sum_set_sizes * RANGE_SECTIONS * 1.0 / opts.n) << endl; } void ParseCommandLine(int argc, char **argv, Options &opts) { int opt; long long sig; while ((opt = getopt(argc, argv, "?hk:l:n:S:T:B:p:X")) != -1) { switch (opt) { case 'h' : case '?' : usage(0); break; case 'p' : sig = atoll(optarg); if (sig < 1) errx(EX_USAGE, "must have at least 1 thread"); opts.threads = sig; break; case 'B' : sig = atoll(optarg); if (sig < 1) errx(EX_USAGE, "block size must be positive"); opts.block_size = sig; break; case 'n' : sig = atoll(optarg); if (sig < 1) errx(EX_USAGE, "n must be positive integer"); if (sig > MAX_N) errx(EX_USAGE, "n must be no more than %d", MAX_N); opts.n = sig; break; case 'k' : sig = atoll(optarg); if (sig < 1) errx(EX_USAGE, "k must be positive integer"); opts.k = sig; break; case 'l' : sig = atoll(optarg); if (sig < 1) errx(EX_USAGE, "l must be positive integer"); if (sig > 31) errx(EX_USAGE, "l must be no more than 31"); opts.l = sig; break; case 'X' : opts.input_is_protein = true; break; case 'S' : opts.spaced_seed_mask = strtol(optarg, nullptr, 2); break; case 'T' : opts.toggle_mask = strtol(optarg, nullptr, 2); break; } } if (opts.spaced_seed_mask != DEFAULT_SPACED_SEED_MASK) ExpandSpacedSeedMask(opts.spaced_seed_mask, opts.input_is_protein ? BITS_PER_CHAR_PRO : BITS_PER_CHAR_DNA); if (opts.k == 0 || opts.l == 0) { cerr << "missing mandatory integer parameter" << endl; usage(); } if (opts.k < opts.l) { cerr << "k cannot be less than l" << endl; usage(); } } void usage(int exit_code) { cerr << "Usage: estimate_capacity " << endl << endl << "Options (*mandatory):" << endl << "* -k INT Set length of k-mers" << endl << "* -l INT Set length of minimizers" << endl << " -n INT Set maximum qualifying hash code" << endl << " -X Input sequences are proteins" << endl << " -S BITSTRING Spaced seed mask" << endl << " -T BITSTRING Minimizer ordering toggle mask" << endl << " -B INT Read block size" << endl << " -p INT Number of threads" << endl; exit(exit_code); } void ProcessSequence(string &seq, Options &opts, vector> &sets) { MinimizerScanner scanner(opts.k, opts.l, opts.spaced_seed_mask, ! opts.input_is_protein, opts.toggle_mask); // Add terminator for protein sequences if not already there if (opts.input_is_protein && seq.back() != '*') seq.push_back('*'); scanner.LoadSequence(seq); uint64_t *minimizer_ptr; while ((minimizer_ptr = scanner.NextMinimizer())) { if (scanner.is_ambiguous()) continue; uint64_t hash_code = MurmurHash3(*minimizer_ptr); if ((hash_code & RANGE_MASK) < opts.n) { #pragma omp critical(set_insert) sets[hash_code & RANGE_MASK].insert(*minimizer_ptr); } } } kraken2-2.1.2/src/hyperloglogplus-bias.h000066400000000000000000002156011404614725400202000ustar00rootroot00000000000000/* * hyperloglogbias.h * * Created on: Apr 25, 2015 * Author: fbreitwieser */ #ifndef HYPERLOGLOGBIAS_H_ #define HYPERLOGLOGBIAS_H_ // Bias data from Heule et al., 2015 // URL: https://docs.google.com/document/d/1gyjfMHy43U9OWBXxfaeG-3MjGzejW1dlpyMwEYAAWEI/view?fullscreen // experimentally determined threshold values for p - 4 static const uint32_t threshold[] = { 10, // precision 4 20, 40, 80, 220, 400, 900, 1800, 3100, 6500, 11500, 20000, 50000, 120000, 350000 // precision 18 }; const std::vector rawEstimateData[] = { // precision 4 { 11, 11.717, 12.207, 12.7896, 13.2882, 13.8204, 14.3772, 14.9342, 15.5202, 16.161, 16.7722, 17.4636, 18.0396, 18.6766, 19.3566, 20.0454, 20.7936, 21.4856, 22.2666, 22.9946, 23.766, 24.4692, 25.3638, 26.0764, 26.7864, 27.7602, 28.4814, 29.433, 30.2926, 31.0664, 31.9996, 32.7956, 33.5366, 34.5894, 35.5738, 36.2698, 37.3682, 38.0544, 39.2342, 40.0108, 40.7966, 41.9298, 42.8704, 43.6358, 44.5194, 45.773, 46.6772, 47.6174, 48.4888, 49.3304, 50.2506, 51.4996, 52.3824, 53.3078, 54.3984, 55.5838, 56.6618, 57.2174, 58.3514, 59.0802, 60.1482, 61.0376, 62.3598, 62.8078, 63.9744, 64.914, 65.781, 67.1806, 68.0594, 68.8446, 69.7928, 70.8248, 71.8324, 72.8598, 73.6246, 74.7014, 75.393, 76.6708, 77.2394, }, // precision 5 { 23, 23.1194, 23.8208, 24.2318, 24.77, 25.2436, 25.7774, 26.2848, 26.8224, 27.3742, 27.9336, 28.503, 29.0494, 29.6292, 30.2124, 30.798, 31.367, 31.9728, 32.5944, 33.217, 33.8438, 34.3696, 35.0956, 35.7044, 36.324, 37.0668, 37.6698, 38.3644, 39.049, 39.6918, 40.4146, 41.082, 41.687, 42.5398, 43.2462, 43.857, 44.6606, 45.4168, 46.1248, 46.9222, 47.6804, 48.447, 49.3454, 49.9594, 50.7636, 51.5776, 52.331, 53.19, 53.9676, 54.7564, 55.5314, 56.4442, 57.3708, 57.9774, 58.9624, 59.8796, 60.755, 61.472, 62.2076, 63.1024, 63.8908, 64.7338, 65.7728, 66.629, 67.413, 68.3266, 69.1524, 70.2642, 71.1806, 72.0566, 72.9192, 73.7598, 74.3516, 75.5802, 76.4386, 77.4916, 78.1524, 79.1892, 79.8414, 80.8798, 81.8376, 82.4698, 83.7656, 84.331, 85.5914, 86.6012, 87.7016, 88.5582, 89.3394, 90.3544, 91.4912, 92.308, 93.3552, 93.9746, 95.2052, 95.727, 97.1322, 98.3944, 98.7588, 100.242, 101.1914, 102.2538, 102.8776, 103.6292, 105.1932, 105.9152, 107.0868, 107.6728, 108.7144, 110.3114, 110.8716, 111.245, 112.7908, 113.7064, 114.636, 115.7464, 116.1788, 117.7464, 118.4896, 119.6166, 120.5082, 121.7798, 122.9028, 123.4426, 124.8854, 125.705, 126.4652, 128.3464, 128.3462, 130.0398, 131.0342, 131.0042, 132.4766, 133.511, 134.7252, 135.425, 136.5172, 138.0572, 138.6694, 139.3712, 140.8598, 141.4594, 142.554, 143.4006, 144.7374, 146.1634, 146.8994, 147.605, 147.9304, 149.1636, 150.2468, 151.5876, 152.2096, 153.7032, 154.7146, 155.807, 156.9228, 157.0372, 158.5852, }, // precision 6 { 46, 46.1902, 47.271, 47.8358, 48.8142, 49.2854, 50.317, 51.354, 51.8924, 52.9436, 53.4596, 54.5262, 55.6248, 56.1574, 57.2822, 57.837, 58.9636, 60.074, 60.7042, 61.7976, 62.4772, 63.6564, 64.7942, 65.5004, 66.686, 67.291, 68.5672, 69.8556, 70.4982, 71.8204, 72.4252, 73.7744, 75.0786, 75.8344, 77.0294, 77.8098, 79.0794, 80.5732, 81.1878, 82.5648, 83.2902, 84.6784, 85.3352, 86.8946, 88.3712, 89.0852, 90.499, 91.2686, 92.6844, 94.2234, 94.9732, 96.3356, 97.2286, 98.7262, 100.3284, 101.1048, 102.5962, 103.3562, 105.1272, 106.4184, 107.4974, 109.0822, 109.856, 111.48, 113.2834, 114.0208, 115.637, 116.5174, 118.0576, 119.7476, 120.427, 122.1326, 123.2372, 125.2788, 126.6776, 127.7926, 129.1952, 129.9564, 131.6454, 133.87, 134.5428, 136.2, 137.0294, 138.6278, 139.6782, 141.792, 143.3516, 144.2832, 146.0394, 147.0748, 148.4912, 150.849, 151.696, 153.5404, 154.073, 156.3714, 157.7216, 158.7328, 160.4208, 161.4184, 163.9424, 165.2772, 166.411, 168.1308, 168.769, 170.9258, 172.6828, 173.7502, 175.706, 176.3886, 179.0186, 180.4518, 181.927, 183.4172, 184.4114, 186.033, 188.5124, 189.5564, 191.6008, 192.4172, 193.8044, 194.997, 197.4548, 198.8948, 200.2346, 202.3086, 203.1548, 204.8842, 206.6508, 206.6772, 209.7254, 210.4752, 212.7228, 214.6614, 215.1676, 217.793, 218.0006, 219.9052, 221.66, 223.5588, 225.1636, 225.6882, 227.7126, 229.4502, 231.1978, 232.9756, 233.1654, 236.727, 238.1974, 237.7474, 241.1346, 242.3048, 244.1948, 245.3134, 246.879, 249.1204, 249.853, 252.6792, 253.857, 254.4486, 257.2362, 257.9534, 260.0286, 260.5632, 262.663, 264.723, 265.7566, 267.2566, 267.1624, 270.62, 272.8216, 273.2166, 275.2056, 276.2202, 278.3726, 280.3344, 281.9284, 283.9728, 284.1924, 286.4872, 287.587, 289.807, 291.1206, 292.769, 294.8708, 296.665, 297.1182, 299.4012, 300.6352, 302.1354, 304.1756, 306.1606, 307.3462, 308.5214, 309.4134, 310.8352, 313.9684, 315.837, 316.7796, 318.9858, }, // precision 7 { 92, 93.4934, 94.9758, 96.4574, 97.9718, 99.4954, 101.5302, 103.0756, 104.6374, 106.1782, 107.7888, 109.9522, 111.592, 113.2532, 114.9086, 116.5938, 118.9474, 120.6796, 122.4394, 124.2176, 125.9768, 128.4214, 130.2528, 132.0102, 133.8658, 135.7278, 138.3044, 140.1316, 142.093, 144.0032, 145.9092, 148.6306, 150.5294, 152.5756, 154.6508, 156.662, 159.552, 161.3724, 163.617, 165.5754, 167.7872, 169.8444, 172.7988, 174.8606, 177.2118, 179.3566, 181.4476, 184.5882, 186.6816, 189.0824, 191.0258, 193.6048, 196.4436, 198.7274, 200.957, 203.147, 205.4364, 208.7592, 211.3386, 213.781, 215.8028, 218.656, 221.6544, 223.996, 226.4718, 229.1544, 231.6098, 234.5956, 237.0616, 239.5758, 242.4878, 244.5244, 248.2146, 250.724, 252.8722, 255.5198, 258.0414, 261.941, 264.9048, 266.87, 269.4304, 272.028, 274.4708, 278.37, 281.0624, 283.4668, 286.5532, 289.4352, 293.2564, 295.2744, 298.2118, 300.7472, 304.1456, 307.2928, 309.7504, 312.5528, 315.979, 318.2102, 322.1834, 324.3494, 327.325, 330.6614, 332.903, 337.2544, 339.9042, 343.215, 345.2864, 348.0814, 352.6764, 355.301, 357.139, 360.658, 363.1732, 366.5902, 369.9538, 373.0828, 375.922, 378.9902, 382.7328, 386.4538, 388.1136, 391.2234, 394.0878, 396.708, 401.1556, 404.1852, 406.6372, 409.6822, 412.7796, 416.6078, 418.4916, 422.131, 424.5376, 428.1988, 432.211, 434.4502, 438.5282, 440.912, 444.0448, 447.7432, 450.8524, 453.7988, 456.7858, 458.8868, 463.9886, 466.5064, 468.9124, 472.6616, 475.4682, 478.582, 481.304, 485.2738, 488.6894, 490.329, 496.106, 497.6908, 501.1374, 504.5322, 506.8848, 510.3324, 513.4512, 516.179, 520.4412, 522.6066, 526.167, 528.7794, 533.379, 536.067, 538.46, 542.9116, 545.692, 547.9546, 552.493, 555.2722, 557.335, 562.449, 564.2014, 569.0738, 571.0974, 574.8564, 578.2996, 581.409, 583.9704, 585.8098, 589.6528, 594.5998, 595.958, 600.068, 603.3278, 608.2016, 609.9632, 612.864, 615.43, 620.7794, 621.272, 625.8644, 629.206, 633.219, 634.5154, 638.6102, }, // precision 8 { 184.2152, 187.2454, 190.2096, 193.6652, 196.6312, 199.6822, 203.249, 206.3296, 210.0038, 213.2074, 216.4612, 220.27, 223.5178, 227.4412, 230.8032, 234.1634, 238.1688, 241.6074, 245.6946, 249.2664, 252.8228, 257.0432, 260.6824, 264.9464, 268.6268, 272.2626, 276.8376, 280.4034, 284.8956, 288.8522, 292.7638, 297.3552, 301.3556, 305.7526, 309.9292, 313.8954, 318.8198, 322.7668, 327.298, 331.6688, 335.9466, 340.9746, 345.1672, 349.3474, 354.3028, 358.8912, 364.114, 368.4646, 372.9744, 378.4092, 382.6022, 387.843, 392.5684, 397.1652, 402.5426, 407.4152, 412.5388, 417.3592, 422.1366, 427.486, 432.3918, 437.5076, 442.509, 447.3834, 453.3498, 458.0668, 463.7346, 469.1228, 473.4528, 479.7, 484.644, 491.0518, 495.5774, 500.9068, 506.432, 512.1666, 517.434, 522.6644, 527.4894, 533.6312, 538.3804, 544.292, 550.5496, 556.0234, 562.8206, 566.6146, 572.4188, 579.117, 583.6762, 590.6576, 595.7864, 601.509, 607.5334, 612.9204, 619.772, 624.2924, 630.8654, 636.1836, 642.745, 649.1316, 655.0386, 660.0136, 666.6342, 671.6196, 678.1866, 684.4282, 689.3324, 695.4794, 702.5038, 708.129, 713.528, 720.3204, 726.463, 732.7928, 739.123, 744.7418, 751.2192, 756.5102, 762.6066, 769.0184, 775.2224, 781.4014, 787.7618, 794.1436, 798.6506, 805.6378, 811.766, 819.7514, 824.5776, 828.7322, 837.8048, 843.6302, 849.9336, 854.4798, 861.3388, 867.9894, 873.8196, 880.3136, 886.2308, 892.4588, 899.0816, 905.4076, 912.0064, 917.3878, 923.619, 929.998, 937.3482, 943.9506, 947.991, 955.1144, 962.203, 968.8222, 975.7324, 981.7826, 988.7666, 994.2648, 1000.3128, 1007.4082, 1013.7536, 1020.3376, 1026.7156, 1031.7478, 1037.4292, 1045.393, 1051.2278, 1058.3434, 1062.8726, 1071.884, 1076.806, 1082.9176, 1089.1678, 1095.5032, 1102.525, 1107.2264, 1115.315, 1120.93, 1127.252, 1134.1496, 1139.0408, 1147.5448, 1153.3296, 1158.1974, 1166.5262, 1174.3328, 1175.657, 1184.4222, 1190.9172, 1197.1292, 1204.4606, 1210.4578, 1218.8728, 1225.3336, 1226.6592, 1236.5768, 1241.363, 1249.4074, 1254.6566, 1260.8014, 1266.5454, 1274.5192, }, // precision 9 { 369, 374.8294, 381.2452, 387.6698, 394.1464, 400.2024, 406.8782, 413.6598, 420.462, 427.2826, 433.7102, 440.7416, 447.9366, 455.1046, 462.285, 469.0668, 476.306, 483.8448, 491.301, 498.9886, 506.2422, 513.8138, 521.7074, 529.7428, 537.8402, 545.1664, 553.3534, 561.594, 569.6886, 577.7876, 585.65, 594.228, 602.8036, 611.1666, 620.0818, 628.0824, 637.2574, 646.302, 655.1644, 664.0056, 672.3802, 681.7192, 690.5234, 700.2084, 708.831, 718.485, 728.1112, 737.4764, 746.76, 756.3368, 766.5538, 775.5058, 785.2646, 795.5902, 804.3818, 814.8998, 824.9532, 835.2062, 845.2798, 854.4728, 864.9582, 875.3292, 886.171, 896.781, 906.5716, 916.7048, 927.5322, 937.875, 949.3972, 958.3464, 969.7274, 980.2834, 992.1444, 1003.4264, 1013.0166, 1024.018, 1035.0438, 1046.34, 1057.6856, 1068.9836, 1079.0312, 1091.677, 1102.3188, 1113.4846, 1124.4424, 1135.739, 1147.1488, 1158.9202, 1169.406, 1181.5342, 1193.2834, 1203.8954, 1216.3286, 1226.2146, 1239.6684, 1251.9946, 1262.123, 1275.4338, 1285.7378, 1296.076, 1308.9692, 1320.4964, 1333.0998, 1343.9864, 1357.7754, 1368.3208, 1380.4838, 1392.7388, 1406.0758, 1416.9098, 1428.9728, 1440.9228, 1453.9292, 1462.617, 1476.05, 1490.2996, 1500.6128, 1513.7392, 1524.5174, 1536.6322, 1548.2584, 1562.3766, 1572.423, 1587.1232, 1596.5164, 1610.5938, 1622.5972, 1633.1222, 1647.7674, 1658.5044, 1671.57, 1683.7044, 1695.4142, 1708.7102, 1720.6094, 1732.6522, 1747.841, 1756.4072, 1769.9786, 1782.3276, 1797.5216, 1808.3186, 1819.0694, 1834.354, 1844.575, 1856.2808, 1871.1288, 1880.7852, 1893.9622, 1906.3418, 1920.6548, 1932.9302, 1945.8584, 1955.473, 1968.8248, 1980.6446, 1995.9598, 2008.349, 2019.8556, 2033.0334, 2044.0206, 2059.3956, 2069.9174, 2082.6084, 2093.7036, 2106.6108, 2118.9124, 2132.301, 2144.7628, 2159.8422, 2171.0212, 2183.101, 2193.5112, 2208.052, 2221.3194, 2233.3282, 2247.295, 2257.7222, 2273.342, 2286.5638, 2299.6786, 2310.8114, 2322.3312, 2335.516, 2349.874, 2363.5968, 2373.865, 2387.1918, 2401.8328, 2414.8496, 2424.544, 2436.7592, 2447.1682, 2464.1958, 2474.3438, 2489.0006, 2497.4526, 2513.6586, 2527.19, 2540.7028, 2553.768, }, // precision 10 { 738.1256, 750.4234, 763.1064, 775.4732, 788.4636, 801.0644, 814.488, 827.9654, 841.0832, 854.7864, 868.1992, 882.2176, 896.5228, 910.1716, 924.7752, 938.899, 953.6126, 968.6492, 982.9474, 998.5214, 1013.1064, 1028.6364, 1044.2468, 1059.4588, 1075.3832, 1091.0584, 1106.8606, 1123.3868, 1139.5062, 1156.1862, 1172.463, 1189.339, 1206.1936, 1223.1292, 1240.1854, 1257.2908, 1275.3324, 1292.8518, 1310.5204, 1328.4854, 1345.9318, 1364.552, 1381.4658, 1400.4256, 1419.849, 1438.152, 1456.8956, 1474.8792, 1494.118, 1513.62, 1532.5132, 1551.9322, 1570.7726, 1590.6086, 1610.5332, 1630.5918, 1650.4294, 1669.7662, 1690.4106, 1710.7338, 1730.9012, 1750.4486, 1770.1556, 1791.6338, 1812.7312, 1833.6264, 1853.9526, 1874.8742, 1896.8326, 1918.1966, 1939.5594, 1961.07, 1983.037, 2003.1804, 2026.071, 2047.4884, 2070.0848, 2091.2944, 2114.333, 2135.9626, 2158.2902, 2181.0814, 2202.0334, 2224.4832, 2246.39, 2269.7202, 2292.1714, 2314.2358, 2338.9346, 2360.891, 2384.0264, 2408.3834, 2430.1544, 2454.8684, 2476.9896, 2501.4368, 2522.8702, 2548.0408, 2570.6738, 2593.5208, 2617.0158, 2640.2302, 2664.0962, 2687.4986, 2714.2588, 2735.3914, 2759.6244, 2781.8378, 2808.0072, 2830.6516, 2856.2454, 2877.2136, 2903.4546, 2926.785, 2951.2294, 2976.468, 3000.867, 3023.6508, 3049.91, 3073.5984, 3098.162, 3121.5564, 3146.2328, 3170.9484, 3195.5902, 3221.3346, 3242.7032, 3271.6112, 3296.5546, 3317.7376, 3345.072, 3369.9518, 3394.326, 3418.1818, 3444.6926, 3469.086, 3494.2754, 3517.8698, 3544.248, 3565.3768, 3588.7234, 3616.979, 3643.7504, 3668.6812, 3695.72, 3719.7392, 3742.6224, 3770.4456, 3795.6602, 3819.9058, 3844.002, 3869.517, 3895.6824, 3920.8622, 3947.1364, 3973.985, 3995.4772, 4021.62, 4046.628, 4074.65, 4096.2256, 4121.831, 4146.6406, 4173.276, 4195.0744, 4223.9696, 4251.3708, 4272.9966, 4300.8046, 4326.302, 4353.1248, 4374.312, 4403.0322, 4426.819, 4450.0598, 4478.5206, 4504.8116, 4528.8928, 4553.9584, 4578.8712, 4603.8384, 4632.3872, 4655.5128, 4675.821, 4704.6222, 4731.9862, 4755.4174, 4781.2628, 4804.332, 4832.3048, 4862.8752, 4883.4148, 4906.9544, 4935.3516, 4954.3532, 4984.0248, 5011.217, 5035.3258, 5057.3672, 5084.1828, }, // precision 11 { 1477, 1501.6014, 1526.5802, 1551.7942, 1577.3042, 1603.2062, 1629.8402, 1656.2292, 1682.9462, 1709.9926, 1737.3026, 1765.4252, 1793.0578, 1821.6092, 1849.626, 1878.5568, 1908.527, 1937.5154, 1967.1874, 1997.3878, 2027.37, 2058.1972, 2089.5728, 2120.1012, 2151.9668, 2183.292, 2216.0772, 2247.8578, 2280.6562, 2313.041, 2345.714, 2380.3112, 2414.1806, 2447.9854, 2481.656, 2516.346, 2551.5154, 2586.8378, 2621.7448, 2656.6722, 2693.5722, 2729.1462, 2765.4124, 2802.8728, 2838.898, 2876.408, 2913.4926, 2951.4938, 2989.6776, 3026.282, 3065.7704, 3104.1012, 3143.7388, 3181.6876, 3221.1872, 3261.5048, 3300.0214, 3339.806, 3381.409, 3421.4144, 3461.4294, 3502.2286, 3544.651, 3586.6156, 3627.337, 3670.083, 3711.1538, 3753.5094, 3797.01, 3838.6686, 3882.1678, 3922.8116, 3967.9978, 4009.9204, 4054.3286, 4097.5706, 4140.6014, 4185.544, 4229.5976, 4274.583, 4316.9438, 4361.672, 4406.2786, 4451.8628, 4496.1834, 4543.505, 4589.1816, 4632.5188, 4678.2294, 4724.8908, 4769.0194, 4817.052, 4861.4588, 4910.1596, 4956.4344, 5002.5238, 5048.13, 5093.6374, 5142.8162, 5187.7894, 5237.3984, 5285.6078, 5331.0858, 5379.1036, 5428.6258, 5474.6018, 5522.7618, 5571.5822, 5618.59, 5667.9992, 5714.88, 5763.454, 5808.6982, 5860.3644, 5910.2914, 5953.571, 6005.9232, 6055.1914, 6104.5882, 6154.5702, 6199.7036, 6251.1764, 6298.7596, 6350.0302, 6398.061, 6448.4694, 6495.933, 6548.0474, 6597.7166, 6646.9416, 6695.9208, 6742.6328, 6793.5276, 6842.1934, 6894.2372, 6945.3864, 6996.9228, 7044.2372, 7094.1374, 7142.2272, 7192.2942, 7238.8338, 7288.9006, 7344.0908, 7394.8544, 7443.5176, 7490.4148, 7542.9314, 7595.6738, 7641.9878, 7694.3688, 7743.0448, 7797.522, 7845.53, 7899.594, 7950.3132, 7996.455, 8050.9442, 8092.9114, 8153.1374, 8197.4472, 8252.8278, 8301.8728, 8348.6776, 8401.4698, 8453.551, 8504.6598, 8553.8944, 8604.1276, 8657.6514, 8710.3062, 8758.908, 8807.8706, 8862.1702, 8910.4668, 8960.77, 9007.2766, 9063.164, 9121.0534, 9164.1354, 9218.1594, 9267.767, 9319.0594, 9372.155, 9419.7126, 9474.3722, 9520.1338, 9572.368, 9622.7702, 9675.8448, 9726.5396, 9778.7378, 9827.6554, 9878.1922, 9928.7782, 9978.3984, 10026.578, 10076.5626, 10137.1618, 10177.5244, 10229.9176, }, // precision 12 { 2954, 3003.4782, 3053.3568, 3104.3666, 3155.324, 3206.9598, 3259.648, 3312.539, 3366.1474, 3420.2576, 3474.8376, 3530.6076, 3586.451, 3643.38, 3700.4104, 3757.5638, 3815.9676, 3875.193, 3934.838, 3994.8548, 4055.018, 4117.1742, 4178.4482, 4241.1294, 4304.4776, 4367.4044, 4431.8724, 4496.3732, 4561.4304, 4627.5326, 4693.949, 4761.5532, 4828.7256, 4897.6182, 4965.5186, 5034.4528, 5104.865, 5174.7164, 5244.6828, 5316.6708, 5387.8312, 5459.9036, 5532.476, 5604.8652, 5679.6718, 5753.757, 5830.2072, 5905.2828, 5980.0434, 6056.6264, 6134.3192, 6211.5746, 6290.0816, 6367.1176, 6447.9796, 6526.5576, 6606.1858, 6686.9144, 6766.1142, 6847.0818, 6927.9664, 7010.9096, 7091.0816, 7175.3962, 7260.3454, 7344.018, 7426.4214, 7511.3106, 7596.0686, 7679.8094, 7765.818, 7852.4248, 7936.834, 8022.363, 8109.5066, 8200.4554, 8288.5832, 8373.366, 8463.4808, 8549.7682, 8642.0522, 8728.3288, 8820.9528, 8907.727, 9001.0794, 9091.2522, 9179.988, 9269.852, 9362.6394, 9453.642, 9546.9024, 9640.6616, 9732.6622, 9824.3254, 9917.7484, 10007.9392, 10106.7508, 10196.2152, 10289.8114, 10383.5494, 10482.3064, 10576.8734, 10668.7872, 10764.7156, 10862.0196, 10952.793, 11049.9748, 11146.0702, 11241.4492, 11339.2772, 11434.2336, 11530.741, 11627.6136, 11726.311, 11821.5964, 11918.837, 12015.3724, 12113.0162, 12213.0424, 12306.9804, 12408.4518, 12504.8968, 12604.586, 12700.9332, 12798.705, 12898.5142, 12997.0488, 13094.788, 13198.475, 13292.7764, 13392.9698, 13486.8574, 13590.1616, 13686.5838, 13783.6264, 13887.2638, 13992.0978, 14081.0844, 14189.9956, 14280.0912, 14382.4956, 14486.4384, 14588.1082, 14686.2392, 14782.276, 14888.0284, 14985.1864, 15088.8596, 15187.0998, 15285.027, 15383.6694, 15495.8266, 15591.3736, 15694.2008, 15790.3246, 15898.4116, 15997.4522, 16095.5014, 16198.8514, 16291.7492, 16402.6424, 16499.1266, 16606.2436, 16697.7186, 16796.3946, 16902.3376, 17005.7672, 17100.814, 17206.8282, 17305.8262, 17416.0744, 17508.4092, 17617.0178, 17715.4554, 17816.758, 17920.1748, 18012.9236, 18119.7984, 18223.2248, 18324.2482, 18426.6276, 18525.0932, 18629.8976, 18733.2588, 18831.0466, 18940.1366, 19032.2696, 19131.729, 19243.4864, 19349.6932, 19442.866, 19547.9448, 19653.2798, 19754.4034, 19854.0692, 19965.1224, 20065.1774, 20158.2212, 20253.353, 20366.3264, 20463.22, }, // precision 13 { 5908.5052, 6007.2672, 6107.347, 6208.5794, 6311.2622, 6414.5514, 6519.3376, 6625.6952, 6732.5988, 6841.3552, 6950.5972, 7061.3082, 7173.5646, 7287.109, 7401.8216, 7516.4344, 7633.3802, 7751.2962, 7870.3784, 7990.292, 8110.79, 8233.4574, 8356.6036, 8482.2712, 8607.7708, 8735.099, 8863.1858, 8993.4746, 9123.8496, 9255.6794, 9388.5448, 9522.7516, 9657.3106, 9792.6094, 9930.5642, 10068.794, 10206.7256, 10347.81, 10490.3196, 10632.0778, 10775.9916, 10920.4662, 11066.124, 11213.073, 11358.0362, 11508.1006, 11659.1716, 11808.7514, 11959.4884, 12112.1314, 12265.037, 12420.3756, 12578.933, 12734.311, 12890.0006, 13047.2144, 13207.3096, 13368.5144, 13528.024, 13689.847, 13852.7528, 14018.3168, 14180.5372, 14346.9668, 14513.5074, 14677.867, 14846.2186, 15017.4186, 15184.9716, 15356.339, 15529.2972, 15697.3578, 15871.8686, 16042.187, 16216.4094, 16389.4188, 16565.9126, 16742.3272, 16919.0042, 17094.7592, 17273.965, 17451.8342, 17634.4254, 17810.5984, 17988.9242, 18171.051, 18354.7938, 18539.466, 18721.0408, 18904.9972, 19081.867, 19271.9118, 19451.8694, 19637.9816, 19821.2922, 20013.1292, 20199.3858, 20387.8726, 20572.9514, 20770.7764, 20955.1714, 21144.751, 21329.9952, 21520.709, 21712.7016, 21906.3868, 22096.2626, 22286.0524, 22475.051, 22665.5098, 22862.8492, 23055.5294, 23249.6138, 23437.848, 23636.273, 23826.093, 24020.3296, 24213.3896, 24411.7392, 24602.9614, 24805.7952, 24998.1552, 25193.9588, 25389.0166, 25585.8392, 25780.6976, 25981.2728, 26175.977, 26376.5252, 26570.1964, 26773.387, 26962.9812, 27163.0586, 27368.164, 27565.0534, 27758.7428, 27961.1276, 28163.2324, 28362.3816, 28565.7668, 28758.644, 28956.9768, 29163.4722, 29354.7026, 29561.1186, 29767.9948, 29959.9986, 30164.0492, 30366.9818, 30562.5338, 30762.9928, 30976.1592, 31166.274, 31376.722, 31570.3734, 31770.809, 31974.8934, 32179.5286, 32387.5442, 32582.3504, 32794.076, 32989.9528, 33191.842, 33392.4684, 33595.659, 33801.8672, 34000.3414, 34200.0922, 34402.6792, 34610.0638, 34804.0084, 35011.13, 35218.669, 35418.6634, 35619.0792, 35830.6534, 36028.4966, 36229.7902, 36438.6422, 36630.7764, 36833.3102, 37048.6728, 37247.3916, 37453.5904, 37669.3614, 37854.5526, 38059.305, 38268.0936, 38470.2516, 38674.7064, 38876.167, 39068.3794, 39281.9144, 39492.8566, 39684.8628, 39898.4108, 40093.1836, 40297.6858, 40489.7086, 40717.2424, }, // precision 14 { 11817.475, 12015.0046, 12215.3792, 12417.7504, 12623.1814, 12830.0086, 13040.0072, 13252.503, 13466.178, 13683.2738, 13902.0344, 14123.9798, 14347.394, 14573.7784, 14802.6894, 15033.6824, 15266.9134, 15502.8624, 15741.4944, 15980.7956, 16223.8916, 16468.6316, 16715.733, 16965.5726, 17217.204, 17470.666, 17727.8516, 17986.7886, 18247.6902, 18510.9632, 18775.304, 19044.7486, 19314.4408, 19587.202, 19862.2576, 20135.924, 20417.0324, 20697.9788, 20979.6112, 21265.0274, 21550.723, 21841.6906, 22132.162, 22428.1406, 22722.127, 23020.5606, 23319.7394, 23620.4014, 23925.2728, 24226.9224, 24535.581, 24845.505, 25155.9618, 25470.3828, 25785.9702, 26103.7764, 26420.4132, 26742.0186, 27062.8852, 27388.415, 27714.6024, 28042.296, 28365.4494, 28701.1526, 29031.8008, 29364.2156, 29704.497, 30037.1458, 30380.111, 30723.8168, 31059.5114, 31404.9498, 31751.6752, 32095.2686, 32444.7792, 32794.767, 33145.204, 33498.4226, 33847.6502, 34209.006, 34560.849, 34919.4838, 35274.9778, 35635.1322, 35996.3266, 36359.1394, 36722.8266, 37082.8516, 37447.7354, 37815.9606, 38191.0692, 38559.4106, 38924.8112, 39294.6726, 39663.973, 40042.261, 40416.2036, 40779.2036, 41161.6436, 41540.9014, 41921.1998, 42294.7698, 42678.5264, 43061.3464, 43432.375, 43818.432, 44198.6598, 44583.0138, 44970.4794, 45353.924, 45729.858, 46118.2224, 46511.5724, 46900.7386, 47280.6964, 47668.1472, 48055.6796, 48446.9436, 48838.7146, 49217.7296, 49613.7796, 50010.7508, 50410.0208, 50793.7886, 51190.2456, 51583.1882, 51971.0796, 52376.5338, 52763.319, 53165.5534, 53556.5594, 53948.2702, 54346.352, 54748.7914, 55138.577, 55543.4824, 55941.1748, 56333.7746, 56745.1552, 57142.7944, 57545.2236, 57935.9956, 58348.5268, 58737.5474, 59158.5962, 59542.6896, 59958.8004, 60349.3788, 60755.0212, 61147.6144, 61548.194, 61946.0696, 62348.6042, 62763.603, 63162.781, 63560.635, 63974.3482, 64366.4908, 64771.5876, 65176.7346, 65597.3916, 65995.915, 66394.0384, 66822.9396, 67203.6336, 67612.2032, 68019.0078, 68420.0388, 68821.22, 69235.8388, 69640.0724, 70055.155, 70466.357, 70863.4266, 71276.2482, 71677.0306, 72080.2006, 72493.0214, 72893.5952, 73314.5856, 73714.9852, 74125.3022, 74521.2122, 74933.6814, 75341.5904, 75743.0244, 76166.0278, 76572.1322, 76973.1028, 77381.6284, 77800.6092, 78189.328, 78607.0962, 79012.2508, 79407.8358, 79825.725, 80238.701, 80646.891, 81035.6436, 81460.0448, 81876.3884, }, // precision 15 { 23635.0036, 24030.8034, 24431.4744, 24837.1524, 25246.7928, 25661.326, 26081.3532, 26505.2806, 26933.9892, 27367.7098, 27805.318, 28248.799, 28696.4382, 29148.8244, 29605.5138, 30066.8668, 30534.2344, 31006.32, 31480.778, 31962.2418, 32447.3324, 32938.0232, 33432.731, 33930.728, 34433.9896, 34944.1402, 35457.5588, 35974.5958, 36497.3296, 37021.9096, 37554.326, 38088.0826, 38628.8816, 39171.3192, 39723.2326, 40274.5554, 40832.3142, 41390.613, 41959.5908, 42532.5466, 43102.0344, 43683.5072, 44266.694, 44851.2822, 45440.7862, 46038.0586, 46640.3164, 47241.064, 47846.155, 48454.7396, 49076.9168, 49692.542, 50317.4778, 50939.65, 51572.5596, 52210.2906, 52843.7396, 53481.3996, 54127.236, 54770.406, 55422.6598, 56078.7958, 56736.7174, 57397.6784, 58064.5784, 58730.308, 59404.9784, 60077.0864, 60751.9158, 61444.1386, 62115.817, 62808.7742, 63501.4774, 64187.5454, 64883.6622, 65582.7468, 66274.5318, 66976.9276, 67688.7764, 68402.138, 69109.6274, 69822.9706, 70543.6108, 71265.5202, 71983.3848, 72708.4656, 73433.384, 74158.4664, 74896.4868, 75620.9564, 76362.1434, 77098.3204, 77835.7662, 78582.6114, 79323.9902, 80067.8658, 80814.9246, 81567.0136, 82310.8536, 83061.9952, 83821.4096, 84580.8608, 85335.547, 86092.5802, 86851.6506, 87612.311, 88381.2016, 89146.3296, 89907.8974, 90676.846, 91451.4152, 92224.5518, 92995.8686, 93763.5066, 94551.2796, 95315.1944, 96096.1806, 96881.0918, 97665.679, 98442.68, 99229.3002, 100011.0994, 100790.6386, 101580.1564, 102377.7484, 103152.1392, 103944.2712, 104730.216, 105528.6336, 106324.9398, 107117.6706, 107890.3988, 108695.2266, 109485.238, 110294.7876, 111075.0958, 111878.0496, 112695.2864, 113464.5486, 114270.0474, 115068.608, 115884.3626, 116673.2588, 117483.3716, 118275.097, 119085.4092, 119879.2808, 120687.5868, 121499.9944, 122284.916, 123095.9254, 123912.5038, 124709.0454, 125503.7182, 126323.259, 127138.9412, 127943.8294, 128755.646, 129556.5354, 130375.3298, 131161.4734, 131971.1962, 132787.5458, 133588.1056, 134431.351, 135220.2906, 136023.398, 136846.6558, 137667.0004, 138463.663, 139283.7154, 140074.6146, 140901.3072, 141721.8548, 142543.2322, 143356.1096, 144173.7412, 144973.0948, 145794.3162, 146609.5714, 147420.003, 148237.9784, 149050.5696, 149854.761, 150663.1966, 151494.0754, 152313.1416, 153112.6902, 153935.7206, 154746.9262, 155559.547, 156401.9746, 157228.7036, 158008.7254, 158820.75, 159646.9184, 160470.4458, 161279.5348, 162093.3114, 162918.542, 163729.2842, }, // precision 16 { 47271, 48062.3584, 48862.7074, 49673.152, 50492.8416, 51322.9514, 52161.03, 53009.407, 53867.6348, 54734.206, 55610.5144, 56496.2096, 57390.795, 58297.268, 59210.6448, 60134.665, 61068.0248, 62010.4472, 62962.5204, 63923.5742, 64895.0194, 65876.4182, 66862.6136, 67862.6968, 68868.8908, 69882.8544, 70911.271, 71944.0924, 72990.0326, 74040.692, 75100.6336, 76174.7826, 77252.5998, 78340.2974, 79438.2572, 80545.4976, 81657.2796, 82784.6336, 83915.515, 85059.7362, 86205.9368, 87364.4424, 88530.3358, 89707.3744, 90885.9638, 92080.197, 93275.5738, 94479.391, 95695.918, 96919.2236, 98148.4602, 99382.3474, 100625.6974, 101878.0284, 103141.6278, 104409.4588, 105686.2882, 106967.5402, 108261.6032, 109548.1578, 110852.0728, 112162.231, 113479.0072, 114806.2626, 116137.9072, 117469.5048, 118813.5186, 120165.4876, 121516.2556, 122875.766, 124250.5444, 125621.2222, 127003.2352, 128387.848, 129775.2644, 131181.7776, 132577.3086, 133979.9458, 135394.1132, 136800.9078, 138233.217, 139668.5308, 141085.212, 142535.2122, 143969.0684, 145420.2872, 146878.1542, 148332.7572, 149800.3202, 151269.66, 152743.6104, 154213.0948, 155690.288, 157169.4246, 158672.1756, 160160.059, 161650.6854, 163145.7772, 164645.6726, 166159.1952, 167682.1578, 169177.3328, 170700.0118, 172228.8964, 173732.6664, 175265.5556, 176787.799, 178317.111, 179856.6914, 181400.865, 182943.4612, 184486.742, 186033.4698, 187583.7886, 189148.1868, 190688.4526, 192250.1926, 193810.9042, 195354.2972, 196938.7682, 198493.5898, 200079.2824, 201618.912, 203205.5492, 204765.5798, 206356.1124, 207929.3064, 209498.7196, 211086.229, 212675.1324, 214256.7892, 215826.2392, 217412.8474, 218995.6724, 220618.6038, 222207.1166, 223781.0364, 225387.4332, 227005.7928, 228590.4336, 230217.8738, 231805.1054, 233408.9, 234995.3432, 236601.4956, 238190.7904, 239817.2548, 241411.2832, 243002.4066, 244640.1884, 246255.3128, 247849.3508, 249479.9734, 251106.8822, 252705.027, 254332.9242, 255935.129, 257526.9014, 259154.772, 260777.625, 262390.253, 264004.4906, 265643.59, 267255.4076, 268873.426, 270470.7252, 272106.4804, 273722.4456, 275337.794, 276945.7038, 278592.9154, 280204.3726, 281841.1606, 283489.171, 285130.1716, 286735.3362, 288364.7164, 289961.1814, 291595.5524, 293285.683, 294899.6668, 296499.3434, 298128.0462, 299761.8946, 301394.2424, 302997.6748, 304615.1478, 306269.7724, 307886.114, 309543.1028, 311153.2862, 312782.8546, 314421.2008, 316033.2438, 317692.9636, 319305.2648, 320948.7406, 322566.3364, 324228.4224, 325847.1542, }, // precision 17 { 94542, 96125.811, 97728.019, 99348.558, 100987.9705, 102646.7565, 104324.5125, 106021.7435, 107736.7865, 109469.272, 111223.9465, 112995.219, 114787.432, 116593.152, 118422.71, 120267.2345, 122134.6765, 124020.937, 125927.2705, 127851.255, 129788.9485, 131751.016, 133726.8225, 135722.592, 137736.789, 139770.568, 141821.518, 143891.343, 145982.1415, 148095.387, 150207.526, 152355.649, 154515.6415, 156696.05, 158887.7575, 161098.159, 163329.852, 165569.053, 167837.4005, 170121.6165, 172420.4595, 174732.6265, 177062.77, 179412.502, 181774.035, 184151.939, 186551.6895, 188965.691, 191402.8095, 193857.949, 196305.0775, 198774.6715, 201271.2585, 203764.78, 206299.3695, 208818.1365, 211373.115, 213946.7465, 216532.076, 219105.541, 221714.5375, 224337.5135, 226977.5125, 229613.0655, 232270.2685, 234952.2065, 237645.3555, 240331.1925, 243034.517, 245756.0725, 248517.6865, 251232.737, 254011.3955, 256785.995, 259556.44, 262368.335, 265156.911, 267965.266, 270785.583, 273616.0495, 276487.4835, 279346.639, 282202.509, 285074.3885, 287942.2855, 290856.018, 293774.0345, 296678.5145, 299603.6355, 302552.6575, 305492.9785, 308466.8605, 311392.581, 314347.538, 317319.4295, 320285.9785, 323301.7325, 326298.3235, 329301.3105, 332301.987, 335309.791, 338370.762, 341382.923, 344431.1265, 347464.1545, 350507.28, 353619.2345, 356631.2005, 359685.203, 362776.7845, 365886.488, 368958.2255, 372060.6825, 375165.4335, 378237.935, 381328.311, 384430.5225, 387576.425, 390683.242, 393839.648, 396977.8425, 400101.9805, 403271.296, 406409.8425, 409529.5485, 412678.7, 415847.423, 419020.8035, 422157.081, 425337.749, 428479.6165, 431700.902, 434893.1915, 438049.582, 441210.5415, 444379.2545, 447577.356, 450741.931, 453959.548, 457137.0935, 460329.846, 463537.4815, 466732.3345, 469960.5615, 473164.681, 476347.6345, 479496.173, 482813.1645, 486025.6995, 489249.4885, 492460.1945, 495675.8805, 498908.0075, 502131.802, 505374.3855, 508550.9915, 511806.7305, 515026.776, 518217.0005, 521523.9855, 524705.9855, 527950.997, 531210.0265, 534472.497, 537750.7315, 540926.922, 544207.094, 547429.4345, 550666.3745, 553975.3475, 557150.7185, 560399.6165, 563662.697, 566916.7395, 570146.1215, 573447.425, 576689.6245, 579874.5745, 583202.337, 586503.0255, 589715.635, 592910.161, 596214.3885, 599488.035, 602740.92, 605983.0685, 609248.67, 612491.3605, 615787.912, 619107.5245, 622307.9555, 625577.333, 628840.4385, 632085.2155, 635317.6135, 638691.7195, 641887.467, 645139.9405, 648441.546, 651666.252, 654941.845, }, // precision 18 { 189084, 192250.913, 195456.774, 198696.946, 201977.762, 205294.444, 208651.754, 212042.099, 215472.269, 218941.91, 222443.912, 225996.845, 229568.199, 233193.568, 236844.457, 240543.233, 244279.475, 248044.27, 251854.588, 255693.2, 259583.619, 263494.621, 267445.385, 271454.061, 275468.769, 279549.456, 283646.446, 287788.198, 291966.099, 296181.164, 300431.469, 304718.618, 309024.004, 313393.508, 317760.803, 322209.731, 326675.061, 331160.627, 335654.47, 340241.442, 344841.833, 349467.132, 354130.629, 358819.432, 363574.626, 368296.587, 373118.482, 377914.93, 382782.301, 387680.669, 392601.981, 397544.323, 402529.115, 407546.018, 412593.658, 417638.657, 422762.865, 427886.169, 433017.167, 438213.273, 443441.254, 448692.421, 453937.533, 459239.049, 464529.569, 469910.083, 475274.03, 480684.473, 486070.26, 491515.237, 496995.651, 502476.617, 507973.609, 513497.19, 519083.233, 524726.509, 530305.505, 535945.728, 541584.404, 547274.055, 552967.236, 558667.862, 564360.216, 570128.148, 575965.08, 581701.952, 587532.523, 593361.144, 599246.128, 605033.418, 610958.779, 616837.117, 622772.818, 628672.04, 634675.369, 640574.831, 646585.739, 652574.547, 658611.217, 664642.684, 670713.914, 676737.681, 682797.313, 688837.897, 694917.874, 701009.882, 707173.648, 713257.254, 719415.392, 725636.761, 731710.697, 737906.209, 744103.074, 750313.39, 756504.185, 762712.579, 768876.985, 775167.859, 781359, 787615.959, 793863.597, 800245.477, 806464.582, 812785.294, 819005.925, 825403.057, 831676.197, 837936.284, 844266.968, 850642.711, 856959.756, 863322.774, 869699.931, 876102.478, 882355.787, 888694.463, 895159.952, 901536.143, 907872.631, 914293.672, 920615.14, 927130.974, 933409.404, 939922.178, 946331.47, 952745.93, 959209.264, 965590.224, 972077.284, 978501.961, 984953.19, 991413.271, 997817.479, 1004222.658, 1010725.676, 1017177.138, 1023612.529, 1030098.236, 1036493.719, 1043112.207, 1049537.036, 1056008.096, 1062476.184, 1068942.337, 1075524.95, 1081932.864, 1088426.025, 1094776.005, 1101327.448, 1107901.673, 1114423.639, 1120884.602, 1127324.923, 1133794.24, 1140328.886, 1146849.376, 1153346.682, 1159836.502, 1166478.703, 1172953.304, 1179391.502, 1185950.982, 1192544.052, 1198913.41, 1205430.994, 1212015.525, 1218674.042, 1225121.683, 1231551.101, 1238126.379, 1244673.795, 1251260.649, 1257697.86, 1264320.983, 1270736.319, 1277274.694, 1283804.95, 1290211.514, 1296858.568, 1303455.691, } }; const std::vector biasData[] = { // precision 4 { 10, 9.717, 9.207, 8.7896, 8.2882, 7.8204, 7.3772, 6.9342, 6.5202, 6.161, 5.7722, 5.4636, 5.0396, 4.6766, 4.3566, 4.0454, 3.7936, 3.4856, 3.2666, 2.9946, 2.766, 2.4692, 2.3638, 2.0764, 1.7864, 1.7602, 1.4814, 1.433, 1.2926, 1.0664, 0.999600000000001, 0.7956, 0.5366, 0.589399999999998, 0.573799999999999, 0.269799999999996, 0.368200000000002, 0.0544000000000011, 0.234200000000001, 0.0108000000000033, -0.203400000000002, -0.0701999999999998, -0.129600000000003, -0.364199999999997, -0.480600000000003, -0.226999999999997, -0.322800000000001, -0.382599999999996, -0.511200000000002, -0.669600000000003, -0.749400000000001, -0.500399999999999, -0.617600000000003, -0.6922, -0.601599999999998, -0.416200000000003, -0.338200000000001, -0.782600000000002, -0.648600000000002, -0.919800000000002, -0.851799999999997, -0.962400000000002, -0.6402, -1.1922, -1.0256, -1.086, -1.21899999999999, -0.819400000000002, -0.940600000000003, -1.1554, -1.2072, -1.1752, -1.16759999999999, -1.14019999999999, -1.3754, -1.29859999999999, -1.607, -1.3292, -1.7606, }, // precision 5 { 22, 21.1194, 20.8208, 20.2318, 19.77, 19.2436, 18.7774, 18.2848, 17.8224, 17.3742, 16.9336, 16.503, 16.0494, 15.6292, 15.2124, 14.798, 14.367, 13.9728, 13.5944, 13.217, 12.8438, 12.3696, 12.0956, 11.7044, 11.324, 11.0668, 10.6698, 10.3644, 10.049, 9.6918, 9.4146, 9.082, 8.687, 8.5398, 8.2462, 7.857, 7.6606, 7.4168, 7.1248, 6.9222, 6.6804, 6.447, 6.3454, 5.9594, 5.7636, 5.5776, 5.331, 5.19, 4.9676, 4.7564, 4.5314, 4.4442, 4.3708, 3.9774, 3.9624, 3.8796, 3.755, 3.472, 3.2076, 3.1024, 2.8908, 2.7338, 2.7728, 2.629, 2.413, 2.3266, 2.1524, 2.2642, 2.1806, 2.0566, 1.9192, 1.7598, 1.3516, 1.5802, 1.43859999999999, 1.49160000000001, 1.1524, 1.1892, 0.841399999999993, 0.879800000000003, 0.837599999999995, 0.469800000000006, 0.765600000000006, 0.331000000000003, 0.591399999999993, 0.601200000000006, 0.701599999999999, 0.558199999999999, 0.339399999999998, 0.354399999999998, 0.491200000000006, 0.308000000000007, 0.355199999999996, -0.0254000000000048, 0.205200000000005, -0.272999999999996, 0.132199999999997, 0.394400000000005, -0.241200000000006, 0.242000000000004, 0.191400000000002, 0.253799999999998, -0.122399999999999, -0.370800000000003, 0.193200000000004, -0.0848000000000013, 0.0867999999999967, -0.327200000000005, -0.285600000000002, 0.311400000000006, -0.128399999999999, -0.754999999999995, -0.209199999999996, -0.293599999999998, -0.364000000000004, -0.253600000000006, -0.821200000000005, -0.253600000000006, -0.510400000000004, -0.383399999999995, -0.491799999999998, -0.220200000000006, -0.0972000000000008, -0.557400000000001, -0.114599999999996, -0.295000000000002, -0.534800000000004, 0.346399999999988, -0.65379999999999, 0.0398000000000138, 0.0341999999999985, -0.995800000000003, -0.523400000000009, -0.489000000000004, -0.274799999999999, -0.574999999999989, -0.482799999999997, 0.0571999999999946, -0.330600000000004, -0.628800000000012, -0.140199999999993, -0.540600000000012, -0.445999999999998, -0.599400000000003, -0.262599999999992, 0.163399999999996, -0.100599999999986, -0.39500000000001, -1.06960000000001, -0.836399999999998, -0.753199999999993, -0.412399999999991, -0.790400000000005, -0.29679999999999, -0.28540000000001, -0.193000000000012, -0.0772000000000048, -0.962799999999987, -0.414800000000014, }, // precision 6 { 45, 44.1902, 43.271, 42.8358, 41.8142, 41.2854, 40.317, 39.354, 38.8924, 37.9436, 37.4596, 36.5262, 35.6248, 35.1574, 34.2822, 33.837, 32.9636, 32.074, 31.7042, 30.7976, 30.4772, 29.6564, 28.7942, 28.5004, 27.686, 27.291, 26.5672, 25.8556, 25.4982, 24.8204, 24.4252, 23.7744, 23.0786, 22.8344, 22.0294, 21.8098, 21.0794, 20.5732, 20.1878, 19.5648, 19.2902, 18.6784, 18.3352, 17.8946, 17.3712, 17.0852, 16.499, 16.2686, 15.6844, 15.2234, 14.9732, 14.3356, 14.2286, 13.7262, 13.3284, 13.1048, 12.5962, 12.3562, 12.1272, 11.4184, 11.4974, 11.0822, 10.856, 10.48, 10.2834, 10.0208, 9.637, 9.51739999999999, 9.05759999999999, 8.74760000000001, 8.42700000000001, 8.1326, 8.2372, 8.2788, 7.6776, 7.79259999999999, 7.1952, 6.9564, 6.6454, 6.87, 6.5428, 6.19999999999999, 6.02940000000001, 5.62780000000001, 5.6782, 5.792, 5.35159999999999, 5.28319999999999, 5.0394, 5.07480000000001, 4.49119999999999, 4.84899999999999, 4.696, 4.54040000000001, 4.07300000000001, 4.37139999999999, 3.7216, 3.7328, 3.42080000000001, 3.41839999999999, 3.94239999999999, 3.27719999999999, 3.411, 3.13079999999999, 2.76900000000001, 2.92580000000001, 2.68279999999999, 2.75020000000001, 2.70599999999999, 2.3886, 3.01859999999999, 2.45179999999999, 2.92699999999999, 2.41720000000001, 2.41139999999999, 2.03299999999999, 2.51240000000001, 2.5564, 2.60079999999999, 2.41720000000001, 1.80439999999999, 1.99700000000001, 2.45480000000001, 1.8948, 2.2346, 2.30860000000001, 2.15479999999999, 1.88419999999999, 1.6508, 0.677199999999999, 1.72540000000001, 1.4752, 1.72280000000001, 1.66139999999999, 1.16759999999999, 1.79300000000001, 1.00059999999999, 0.905200000000008, 0.659999999999997, 1.55879999999999, 1.1636, 0.688199999999995, 0.712600000000009, 0.450199999999995, 1.1978, 0.975599999999986, 0.165400000000005, 1.727, 1.19739999999999, -0.252600000000001, 1.13460000000001, 1.3048, 1.19479999999999, 0.313400000000001, 0.878999999999991, 1.12039999999999, 0.853000000000009, 1.67920000000001, 0.856999999999999, 0.448599999999999, 1.2362, 0.953399999999988, 1.02859999999998, 0.563199999999995, 0.663000000000011, 0.723000000000013, 0.756599999999992, 0.256599999999992, -0.837600000000009, 0.620000000000005, 0.821599999999989, 0.216600000000028, 0.205600000000004, 0.220199999999977, 0.372599999999977, 0.334400000000016, 0.928400000000011, 0.972800000000007, 0.192400000000021, 0.487199999999973, -0.413000000000011, 0.807000000000016, 0.120600000000024, 0.769000000000005, 0.870799999999974, 0.66500000000002, 0.118200000000002, 0.401200000000017, 0.635199999999998, 0.135400000000004, 0.175599999999974, 1.16059999999999, 0.34620000000001, 0.521400000000028, -0.586599999999976, -1.16480000000001, 0.968399999999974, 0.836999999999989, 0.779600000000016, 0.985799999999983, }, // precision 7 { 91, 89.4934, 87.9758, 86.4574, 84.9718, 83.4954, 81.5302, 80.0756, 78.6374, 77.1782, 75.7888, 73.9522, 72.592, 71.2532, 69.9086, 68.5938, 66.9474, 65.6796, 64.4394, 63.2176, 61.9768, 60.4214, 59.2528, 58.0102, 56.8658, 55.7278, 54.3044, 53.1316, 52.093, 51.0032, 49.9092, 48.6306, 47.5294, 46.5756, 45.6508, 44.662, 43.552, 42.3724, 41.617, 40.5754, 39.7872, 38.8444, 37.7988, 36.8606, 36.2118, 35.3566, 34.4476, 33.5882, 32.6816, 32.0824, 31.0258, 30.6048, 29.4436, 28.7274, 27.957, 27.147, 26.4364, 25.7592, 25.3386, 24.781, 23.8028, 23.656, 22.6544, 21.996, 21.4718, 21.1544, 20.6098, 19.5956, 19.0616, 18.5758, 18.4878, 17.5244, 17.2146, 16.724, 15.8722, 15.5198, 15.0414, 14.941, 14.9048, 13.87, 13.4304, 13.028, 12.4708, 12.37, 12.0624, 11.4668, 11.5532, 11.4352, 11.2564, 10.2744, 10.2118, 9.74720000000002, 10.1456, 9.2928, 8.75040000000001, 8.55279999999999, 8.97899999999998, 8.21019999999999, 8.18340000000001, 7.3494, 7.32499999999999, 7.66140000000001, 6.90300000000002, 7.25439999999998, 6.9042, 7.21499999999997, 6.28640000000001, 6.08139999999997, 6.6764, 6.30099999999999, 5.13900000000001, 5.65800000000002, 5.17320000000001, 4.59019999999998, 4.9538, 5.08280000000002, 4.92200000000003, 4.99020000000002, 4.7328, 5.4538, 4.11360000000002, 4.22340000000003, 4.08780000000002, 3.70800000000003, 4.15559999999999, 4.18520000000001, 3.63720000000001, 3.68220000000002, 3.77960000000002, 3.6078, 2.49160000000001, 3.13099999999997, 2.5376, 3.19880000000001, 3.21100000000001, 2.4502, 3.52820000000003, 2.91199999999998, 3.04480000000001, 2.7432, 2.85239999999999, 2.79880000000003, 2.78579999999999, 1.88679999999999, 2.98860000000002, 2.50639999999999, 1.91239999999999, 2.66160000000002, 2.46820000000002, 1.58199999999999, 1.30399999999997, 2.27379999999999, 2.68939999999998, 1.32900000000001, 3.10599999999999, 1.69080000000002, 2.13740000000001, 2.53219999999999, 1.88479999999998, 1.33240000000001, 1.45119999999997, 1.17899999999997, 2.44119999999998, 1.60659999999996, 2.16700000000003, 0.77940000000001, 2.37900000000002, 2.06700000000001, 1.46000000000004, 2.91160000000002, 1.69200000000001, 0.954600000000028, 2.49300000000005, 2.2722, 1.33500000000004, 2.44899999999996, 1.20140000000004, 3.07380000000001, 2.09739999999999, 2.85640000000001, 2.29960000000005, 2.40899999999999, 1.97040000000004, 0.809799999999996, 1.65279999999996, 2.59979999999996, 0.95799999999997, 2.06799999999998, 2.32780000000002, 4.20159999999998, 1.96320000000003, 1.86400000000003, 1.42999999999995, 3.77940000000001, 1.27200000000005, 1.86440000000005, 2.20600000000002, 3.21900000000005, 1.5154, 2.61019999999996, }, // precision 8 { 183.2152, 180.2454, 177.2096, 173.6652, 170.6312, 167.6822, 164.249, 161.3296, 158.0038, 155.2074, 152.4612, 149.27, 146.5178, 143.4412, 140.8032, 138.1634, 135.1688, 132.6074, 129.6946, 127.2664, 124.8228, 122.0432, 119.6824, 116.9464, 114.6268, 112.2626, 109.8376, 107.4034, 104.8956, 102.8522, 100.7638, 98.3552, 96.3556, 93.7526, 91.9292, 89.8954, 87.8198, 85.7668, 83.298, 81.6688, 79.9466, 77.9746, 76.1672, 74.3474, 72.3028, 70.8912, 69.114, 67.4646, 65.9744, 64.4092, 62.6022, 60.843, 59.5684, 58.1652, 56.5426, 55.4152, 53.5388, 52.3592, 51.1366, 49.486, 48.3918, 46.5076, 45.509, 44.3834, 43.3498, 42.0668, 40.7346, 40.1228, 38.4528, 37.7, 36.644, 36.0518, 34.5774, 33.9068, 32.432, 32.1666, 30.434, 29.6644, 28.4894, 27.6312, 26.3804, 26.292, 25.5496000000001, 25.0234, 24.8206, 22.6146, 22.4188, 22.117, 20.6762, 20.6576, 19.7864, 19.509, 18.5334, 17.9204, 17.772, 16.2924, 16.8654, 15.1836, 15.745, 15.1316, 15.0386, 14.0136, 13.6342, 12.6196, 12.1866, 12.4281999999999, 11.3324, 10.4794000000001, 11.5038, 10.129, 9.52800000000002, 10.3203999999999, 9.46299999999997, 9.79280000000006, 9.12300000000005, 8.74180000000001, 9.2192, 7.51020000000005, 7.60659999999996, 7.01840000000004, 7.22239999999999, 7.40139999999997, 6.76179999999999, 7.14359999999999, 5.65060000000005, 5.63779999999997, 5.76599999999996, 6.75139999999999, 5.57759999999996, 3.73220000000003, 5.8048, 5.63019999999995, 4.93359999999996, 3.47979999999995, 4.33879999999999, 3.98940000000005, 3.81960000000004, 3.31359999999995, 3.23080000000004, 3.4588, 3.08159999999998, 3.4076, 3.00639999999999, 2.38779999999997, 2.61900000000003, 1.99800000000005, 3.34820000000002, 2.95060000000001, 0.990999999999985, 2.11440000000005, 2.20299999999997, 2.82219999999995, 2.73239999999998, 2.7826, 3.76660000000004, 2.26480000000004, 2.31280000000004, 2.40819999999997, 2.75360000000001, 3.33759999999995, 2.71559999999999, 1.7478000000001, 1.42920000000004, 2.39300000000003, 2.22779999999989, 2.34339999999997, 0.87259999999992, 3.88400000000001, 1.80600000000004, 1.91759999999999, 1.16779999999994, 1.50320000000011, 2.52500000000009, 0.226400000000012, 2.31500000000005, 0.930000000000064, 1.25199999999995, 2.14959999999996, 0.0407999999999902, 2.5447999999999, 1.32960000000003, 0.197400000000016, 2.52620000000002, 3.33279999999991, -1.34300000000007, 0.422199999999975, 0.917200000000093, 1.12920000000008, 1.46060000000011, 1.45779999999991, 2.8728000000001, 3.33359999999993, -1.34079999999994, 1.57680000000005, 0.363000000000056, 1.40740000000005, 0.656600000000026, 0.801400000000058, -0.454600000000028, 1.51919999999996, }, // precision 9 { 368, 361.8294, 355.2452, 348.6698, 342.1464, 336.2024, 329.8782, 323.6598, 317.462, 311.2826, 305.7102, 299.7416, 293.9366, 288.1046, 282.285, 277.0668, 271.306, 265.8448, 260.301, 254.9886, 250.2422, 244.8138, 239.7074, 234.7428, 229.8402, 225.1664, 220.3534, 215.594, 210.6886, 205.7876, 201.65, 197.228, 192.8036, 188.1666, 184.0818, 180.0824, 176.2574, 172.302, 168.1644, 164.0056, 160.3802, 156.7192, 152.5234, 149.2084, 145.831, 142.485, 139.1112, 135.4764, 131.76, 129.3368, 126.5538, 122.5058, 119.2646, 116.5902, 113.3818, 110.8998, 107.9532, 105.2062, 102.2798, 99.4728, 96.9582, 94.3292, 92.171, 89.7809999999999, 87.5716, 84.7048, 82.5322, 79.875, 78.3972, 75.3464, 73.7274, 71.2834, 70.1444, 68.4263999999999, 66.0166, 64.018, 62.0437999999999, 60.3399999999999, 58.6856, 57.9836, 55.0311999999999, 54.6769999999999, 52.3188, 51.4846, 49.4423999999999, 47.739, 46.1487999999999, 44.9202, 43.4059999999999, 42.5342000000001, 41.2834, 38.8954000000001, 38.3286000000001, 36.2146, 36.6684, 35.9946, 33.123, 33.4338, 31.7378000000001, 29.076, 28.9692, 27.4964, 27.0998, 25.9864, 26.7754, 24.3208, 23.4838, 22.7388000000001, 24.0758000000001, 21.9097999999999, 20.9728, 19.9228000000001, 19.9292, 16.617, 17.05, 18.2996000000001, 15.6128000000001, 15.7392, 14.5174, 13.6322, 12.2583999999999, 13.3766000000001, 11.423, 13.1232, 9.51639999999998, 10.5938000000001, 9.59719999999993, 8.12220000000002, 9.76739999999995, 7.50440000000003, 7.56999999999994, 6.70440000000008, 6.41419999999994, 6.71019999999999, 5.60940000000005, 4.65219999999999, 6.84099999999989, 3.4072000000001, 3.97859999999991, 3.32760000000007, 5.52160000000003, 3.31860000000006, 2.06940000000009, 4.35400000000004, 1.57500000000005, 0.280799999999999, 2.12879999999996, -0.214799999999968, -0.0378000000000611, -0.658200000000079, 0.654800000000023, -0.0697999999999865, 0.858400000000074, -2.52700000000004, -2.1751999999999, -3.35539999999992, -1.04019999999991, -0.651000000000067, -2.14439999999991, -1.96659999999997, -3.97939999999994, -0.604400000000169, -3.08260000000018, -3.39159999999993, -5.29640000000018, -5.38920000000007, -5.08759999999984, -4.69900000000007, -5.23720000000003, -3.15779999999995, -4.97879999999986, -4.89899999999989, -7.48880000000008, -5.94799999999987, -5.68060000000014, -6.67180000000008, -4.70499999999993, -7.27779999999984, -4.6579999999999, -4.4362000000001, -4.32139999999981, -5.18859999999995, -6.66879999999992, -6.48399999999992, -5.1260000000002, -4.4032000000002, -6.13500000000022, -5.80819999999994, -4.16719999999987, -4.15039999999999, -7.45600000000013, -7.24080000000004, -9.83179999999993, -5.80420000000004, -8.6561999999999, -6.99940000000015, -10.5473999999999, -7.34139999999979, -6.80999999999995, -6.29719999999998, -6.23199999999997, }, // precision 10 { 737.1256, 724.4234, 711.1064, 698.4732, 685.4636, 673.0644, 660.488, 647.9654, 636.0832, 623.7864, 612.1992, 600.2176, 588.5228, 577.1716, 565.7752, 554.899, 543.6126, 532.6492, 521.9474, 511.5214, 501.1064, 490.6364, 480.2468, 470.4588, 460.3832, 451.0584, 440.8606, 431.3868, 422.5062, 413.1862, 404.463, 395.339, 386.1936, 378.1292, 369.1854, 361.2908, 353.3324, 344.8518, 337.5204, 329.4854, 321.9318, 314.552, 306.4658, 299.4256, 292.849, 286.152, 278.8956, 271.8792, 265.118, 258.62, 252.5132, 245.9322, 239.7726, 233.6086, 227.5332, 222.5918, 216.4294, 210.7662, 205.4106, 199.7338, 194.9012, 188.4486, 183.1556, 178.6338, 173.7312, 169.6264, 163.9526, 159.8742, 155.8326, 151.1966, 147.5594, 143.07, 140.037, 134.1804, 131.071, 127.4884, 124.0848, 120.2944, 117.333, 112.9626, 110.2902, 107.0814, 103.0334, 99.4832000000001, 96.3899999999999, 93.7202000000002, 90.1714000000002, 87.2357999999999, 85.9346, 82.8910000000001, 80.0264000000002, 78.3834000000002, 75.1543999999999, 73.8683999999998, 70.9895999999999, 69.4367999999999, 64.8701999999998, 65.0408000000002, 61.6738, 59.5207999999998, 57.0158000000001, 54.2302, 53.0962, 50.4985999999999, 52.2588000000001, 47.3914, 45.6244000000002, 42.8377999999998, 43.0072, 40.6516000000001, 40.2453999999998, 35.2136, 36.4546, 33.7849999999999, 33.2294000000002, 32.4679999999998, 30.8670000000002, 28.6507999999999, 28.9099999999999, 27.5983999999999, 26.1619999999998, 24.5563999999999, 23.2328000000002, 21.9484000000002, 21.5902000000001, 21.3346000000001, 17.7031999999999, 20.6111999999998, 19.5545999999999, 15.7375999999999, 17.0720000000001, 16.9517999999998, 15.326, 13.1817999999998, 14.6925999999999, 13.0859999999998, 13.2754, 10.8697999999999, 11.248, 7.3768, 4.72339999999986, 7.97899999999981, 8.7503999999999, 7.68119999999999, 9.7199999999998, 7.73919999999998, 5.6224000000002, 7.44560000000001, 6.6601999999998, 5.9058, 4.00199999999995, 4.51699999999983, 4.68240000000014, 3.86220000000003, 5.13639999999987, 5.98500000000013, 2.47719999999981, 2.61999999999989, 1.62800000000016, 4.65000000000009, 0.225599999999758, 0.831000000000131, -0.359400000000278, 1.27599999999984, -2.92559999999958, -0.0303999999996449, 2.37079999999969, -2.0033999999996, 0.804600000000391, 0.30199999999968, 1.1247999999996, -2.6880000000001, 0.0321999999996478, -1.18099999999959, -3.9402, -1.47940000000017, -0.188400000000001, -2.10720000000038, -2.04159999999956, -3.12880000000041, -4.16160000000036, -0.612799999999879, -3.48719999999958, -8.17900000000009, -5.37780000000021, -4.01379999999972, -5.58259999999973, -5.73719999999958, -7.66799999999967, -5.69520000000011, -1.1247999999996, -5.58520000000044, -8.04560000000038, -4.64840000000004, -11.6468000000004, -7.97519999999986, -5.78300000000036, -7.67420000000038, -10.6328000000003, -9.81720000000041, }, // precision 11 { 1476, 1449.6014, 1423.5802, 1397.7942, 1372.3042, 1347.2062, 1321.8402, 1297.2292, 1272.9462, 1248.9926, 1225.3026, 1201.4252, 1178.0578, 1155.6092, 1132.626, 1110.5568, 1088.527, 1066.5154, 1045.1874, 1024.3878, 1003.37, 982.1972, 962.5728, 942.1012, 922.9668, 903.292, 884.0772, 864.8578, 846.6562, 828.041, 809.714, 792.3112, 775.1806, 757.9854, 740.656, 724.346, 707.5154, 691.8378, 675.7448, 659.6722, 645.5722, 630.1462, 614.4124, 600.8728, 585.898, 572.408, 558.4926, 544.4938, 531.6776, 517.282, 505.7704, 493.1012, 480.7388, 467.6876, 456.1872, 445.5048, 433.0214, 420.806, 411.409, 400.4144, 389.4294, 379.2286, 369.651, 360.6156, 350.337, 342.083, 332.1538, 322.5094, 315.01, 305.6686, 298.1678, 287.8116, 280.9978, 271.9204, 265.3286, 257.5706, 249.6014, 242.544, 235.5976, 229.583, 220.9438, 214.672, 208.2786, 201.8628, 195.1834, 191.505, 186.1816, 178.5188, 172.2294, 167.8908, 161.0194, 158.052, 151.4588, 148.1596, 143.4344, 138.5238, 133.13, 127.6374, 124.8162, 118.7894, 117.3984, 114.6078, 109.0858, 105.1036, 103.6258, 98.6018000000004, 95.7618000000002, 93.5821999999998, 88.5900000000001, 86.9992000000002, 82.8800000000001, 80.4539999999997, 74.6981999999998, 74.3644000000004, 73.2914000000001, 65.5709999999999, 66.9232000000002, 65.1913999999997, 62.5882000000001, 61.5702000000001, 55.7035999999998, 56.1764000000003, 52.7596000000003, 53.0302000000001, 49.0609999999997, 48.4694, 44.933, 46.0474000000004, 44.7165999999997, 41.9416000000001, 39.9207999999999, 35.6328000000003, 35.5276000000003, 33.1934000000001, 33.2371999999996, 33.3864000000003, 33.9228000000003, 30.2371999999996, 29.1373999999996, 25.2272000000003, 24.2942000000003, 19.8338000000003, 18.9005999999999, 23.0907999999999, 21.8544000000002, 19.5176000000001, 15.4147999999996, 16.9314000000004, 18.6737999999996, 12.9877999999999, 14.3688000000002, 12.0447999999997, 15.5219999999999, 12.5299999999997, 14.5940000000001, 14.3131999999996, 9.45499999999993, 12.9441999999999, 3.91139999999996, 13.1373999999996, 5.44720000000052, 9.82779999999912, 7.87279999999919, 3.67760000000089, 5.46980000000076, 5.55099999999948, 5.65979999999945, 3.89439999999922, 3.1275999999998, 5.65140000000065, 6.3062000000009, 3.90799999999945, 1.87060000000019, 5.17020000000048, 2.46680000000015, 0.770000000000437, -3.72340000000077, 1.16400000000067, 8.05340000000069, 0.135399999999208, 2.15940000000046, 0.766999999999825, 1.0594000000001, 3.15500000000065, -0.287399999999252, 2.37219999999979, -2.86620000000039, -1.63199999999961, -2.22979999999916, -0.15519999999924, -1.46039999999994, -0.262199999999211, -2.34460000000036, -2.8078000000005, -3.22179999999935, -5.60159999999996, -8.42200000000048, -9.43740000000071, 0.161799999999857, -10.4755999999998, -10.0823999999993, }, // precision 12 { 2953, 2900.4782, 2848.3568, 2796.3666, 2745.324, 2694.9598, 2644.648, 2595.539, 2546.1474, 2498.2576, 2450.8376, 2403.6076, 2357.451, 2311.38, 2266.4104, 2221.5638, 2176.9676, 2134.193, 2090.838, 2048.8548, 2007.018, 1966.1742, 1925.4482, 1885.1294, 1846.4776, 1807.4044, 1768.8724, 1731.3732, 1693.4304, 1657.5326, 1621.949, 1586.5532, 1551.7256, 1517.6182, 1483.5186, 1450.4528, 1417.865, 1385.7164, 1352.6828, 1322.6708, 1291.8312, 1260.9036, 1231.476, 1201.8652, 1173.6718, 1145.757, 1119.2072, 1092.2828, 1065.0434, 1038.6264, 1014.3192, 988.5746, 965.0816, 940.1176, 917.9796, 894.5576, 871.1858, 849.9144, 827.1142, 805.0818, 783.9664, 763.9096, 742.0816, 724.3962, 706.3454, 688.018, 667.4214, 650.3106, 633.0686, 613.8094, 597.818, 581.4248, 563.834, 547.363, 531.5066, 520.455400000001, 505.583199999999, 488.366, 476.480799999999, 459.7682, 450.0522, 434.328799999999, 423.952799999999, 408.727000000001, 399.079400000001, 387.252200000001, 373.987999999999, 360.852000000001, 351.6394, 339.642, 330.902400000001, 322.661599999999, 311.662200000001, 301.3254, 291.7484, 279.939200000001, 276.7508, 263.215200000001, 254.811400000001, 245.5494, 242.306399999999, 234.8734, 223.787200000001, 217.7156, 212.0196, 200.793, 195.9748, 189.0702, 182.449199999999, 177.2772, 170.2336, 164.741, 158.613600000001, 155.311, 147.5964, 142.837, 137.3724, 132.0162, 130.0424, 121.9804, 120.451800000001, 114.8968, 111.585999999999, 105.933199999999, 101.705, 98.5141999999996, 95.0488000000005, 89.7880000000005, 91.4750000000004, 83.7764000000006, 80.9698000000008, 72.8574000000008, 73.1615999999995, 67.5838000000003, 62.6263999999992, 63.2638000000006, 66.0977999999996, 52.0843999999997, 58.9956000000002, 47.0912000000008, 46.4956000000002, 48.4383999999991, 47.1082000000006, 43.2392, 37.2759999999998, 40.0283999999992, 35.1864000000005, 35.8595999999998, 32.0998, 28.027, 23.6694000000007, 33.8266000000003, 26.3736000000008, 27.2008000000005, 21.3245999999999, 26.4115999999995, 23.4521999999997, 19.5013999999992, 19.8513999999996, 10.7492000000002, 18.6424000000006, 13.1265999999996, 18.2436000000016, 6.71860000000015, 3.39459999999963, 6.33759999999893, 7.76719999999841, 0.813999999998487, 3.82819999999992, 0.826199999999517, 8.07440000000133, -1.59080000000176, 5.01780000000144, 0.455399999998917, -0.24199999999837, 0.174800000000687, -9.07640000000174, -4.20160000000033, -3.77520000000004, -4.75179999999818, -5.3724000000002, -8.90680000000066, -6.10239999999976, -5.74120000000039, -9.95339999999851, -3.86339999999836, -13.7304000000004, -16.2710000000006, -7.51359999999841, -3.30679999999847, -13.1339999999982, -10.0551999999989, -6.72019999999975, -8.59660000000076, -10.9307999999983, -1.8775999999998, -4.82259999999951, -13.7788, -21.6470000000008, -10.6735999999983, -15.7799999999988, }, // precision 13 { 5907.5052, 5802.2672, 5697.347, 5593.5794, 5491.2622, 5390.5514, 5290.3376, 5191.6952, 5093.5988, 4997.3552, 4902.5972, 4808.3082, 4715.5646, 4624.109, 4533.8216, 4444.4344, 4356.3802, 4269.2962, 4183.3784, 4098.292, 4014.79, 3932.4574, 3850.6036, 3771.2712, 3691.7708, 3615.099, 3538.1858, 3463.4746, 3388.8496, 3315.6794, 3244.5448, 3173.7516, 3103.3106, 3033.6094, 2966.5642, 2900.794, 2833.7256, 2769.81, 2707.3196, 2644.0778, 2583.9916, 2523.4662, 2464.124, 2406.073, 2347.0362, 2292.1006, 2238.1716, 2182.7514, 2128.4884, 2077.1314, 2025.037, 1975.3756, 1928.933, 1879.311, 1831.0006, 1783.2144, 1738.3096, 1694.5144, 1649.024, 1606.847, 1564.7528, 1525.3168, 1482.5372, 1443.9668, 1406.5074, 1365.867, 1329.2186, 1295.4186, 1257.9716, 1225.339, 1193.2972, 1156.3578, 1125.8686, 1091.187, 1061.4094, 1029.4188, 1000.9126, 972.3272, 944.004199999999, 915.7592, 889.965, 862.834200000001, 840.4254, 812.598399999999, 785.924200000001, 763.050999999999, 741.793799999999, 721.466, 699.040799999999, 677.997200000002, 649.866999999998, 634.911800000002, 609.8694, 591.981599999999, 570.2922, 557.129199999999, 538.3858, 521.872599999999, 502.951400000002, 495.776399999999, 475.171399999999, 459.751, 439.995200000001, 426.708999999999, 413.7016, 402.3868, 387.262599999998, 372.0524, 357.050999999999, 342.5098, 334.849200000001, 322.529399999999, 311.613799999999, 295.848000000002, 289.273000000001, 274.093000000001, 263.329600000001, 251.389599999999, 245.7392, 231.9614, 229.7952, 217.155200000001, 208.9588, 199.016599999999, 190.839199999999, 180.6976, 176.272799999999, 166.976999999999, 162.5252, 151.196400000001, 149.386999999999, 133.981199999998, 130.0586, 130.164000000001, 122.053400000001, 110.7428, 108.1276, 106.232400000001, 100.381600000001, 98.7668000000012, 86.6440000000002, 79.9768000000004, 82.4722000000002, 68.7026000000005, 70.1186000000016, 71.9948000000004, 58.998599999999, 59.0492000000013, 56.9818000000014, 47.5338000000011, 42.9928, 51.1591999999982, 37.2740000000013, 42.7220000000016, 31.3734000000004, 26.8090000000011, 25.8934000000008, 26.5286000000015, 29.5442000000003, 19.3503999999994, 26.0760000000009, 17.9527999999991, 14.8419999999969, 10.4683999999979, 8.65899999999965, 9.86720000000059, 4.34139999999752, -0.907800000000861, -3.32080000000133, -0.936199999996461, -11.9916000000012, -8.87000000000262, -6.33099999999831, -11.3366000000024, -15.9207999999999, -9.34659999999712, -15.5034000000014, -19.2097999999969, -15.357799999998, -28.2235999999975, -30.6898000000001, -19.3271999999997, -25.6083999999973, -24.409599999999, -13.6385999999984, -33.4473999999973, -32.6949999999997, -28.9063999999998, -31.7483999999968, -32.2935999999972, -35.8329999999987, -47.620600000002, -39.0855999999985, -33.1434000000008, -46.1371999999974, -37.5892000000022, -46.8164000000033, -47.3142000000007, -60.2914000000019, -37.7575999999972, }, // precision 14 { 11816.475, 11605.0046, 11395.3792, 11188.7504, 10984.1814, 10782.0086, 10582.0072, 10384.503, 10189.178, 9996.2738, 9806.0344, 9617.9798, 9431.394, 9248.7784, 9067.6894, 8889.6824, 8712.9134, 8538.8624, 8368.4944, 8197.7956, 8031.8916, 7866.6316, 7703.733, 7544.5726, 7386.204, 7230.666, 7077.8516, 6926.7886, 6778.6902, 6631.9632, 6487.304, 6346.7486, 6206.4408, 6070.202, 5935.2576, 5799.924, 5671.0324, 5541.9788, 5414.6112, 5290.0274, 5166.723, 5047.6906, 4929.162, 4815.1406, 4699.127, 4588.5606, 4477.7394, 4369.4014, 4264.2728, 4155.9224, 4055.581, 3955.505, 3856.9618, 3761.3828, 3666.9702, 3575.7764, 3482.4132, 3395.0186, 3305.8852, 3221.415, 3138.6024, 3056.296, 2970.4494, 2896.1526, 2816.8008, 2740.2156, 2670.497, 2594.1458, 2527.111, 2460.8168, 2387.5114, 2322.9498, 2260.6752, 2194.2686, 2133.7792, 2074.767, 2015.204, 1959.4226, 1898.6502, 1850.006, 1792.849, 1741.4838, 1687.9778, 1638.1322, 1589.3266, 1543.1394, 1496.8266, 1447.8516, 1402.7354, 1361.9606, 1327.0692, 1285.4106, 1241.8112, 1201.6726, 1161.973, 1130.261, 1094.2036, 1048.2036, 1020.6436, 990.901400000002, 961.199800000002, 924.769800000002, 899.526400000002, 872.346400000002, 834.375, 810.432000000001, 780.659800000001, 756.013800000001, 733.479399999997, 707.923999999999, 673.858, 652.222399999999, 636.572399999997, 615.738599999997, 586.696400000001, 564.147199999999, 541.679600000003, 523.943599999999, 505.714599999999, 475.729599999999, 461.779600000002, 449.750800000002, 439.020799999998, 412.7886, 400.245600000002, 383.188199999997, 362.079599999997, 357.533799999997, 334.319000000003, 327.553399999997, 308.559399999998, 291.270199999999, 279.351999999999, 271.791400000002, 252.576999999997, 247.482400000001, 236.174800000001, 218.774599999997, 220.155200000001, 208.794399999999, 201.223599999998, 182.995600000002, 185.5268, 164.547400000003, 176.5962, 150.689599999998, 157.8004, 138.378799999999, 134.021200000003, 117.614399999999, 108.194000000003, 97.0696000000025, 89.6042000000016, 95.6030000000028, 84.7810000000027, 72.635000000002, 77.3482000000004, 59.4907999999996, 55.5875999999989, 50.7346000000034, 61.3916000000027, 50.9149999999936, 39.0384000000049, 58.9395999999979, 29.633600000001, 28.2032000000036, 26.0078000000067, 17.0387999999948, 9.22000000000116, 13.8387999999977, 8.07240000000456, 14.1549999999988, 15.3570000000036, 3.42660000000615, 6.24820000000182, -2.96940000000177, -8.79940000000352, -5.97860000000219, -14.4048000000039, -3.4143999999942, -13.0148000000045, -11.6977999999945, -25.7878000000055, -22.3185999999987, -24.409599999999, -31.9756000000052, -18.9722000000038, -22.8678000000073, -30.8972000000067, -32.3715999999986, -22.3907999999938, -43.6720000000059, -35.9038, -39.7492000000057, -54.1641999999993, -45.2749999999942, -42.2989999999991, -44.1089999999967, -64.3564000000042, -49.9551999999967, -42.6116000000038, }, // precision 15 { 23634.0036, 23210.8034, 22792.4744, 22379.1524, 21969.7928, 21565.326, 21165.3532, 20770.2806, 20379.9892, 19994.7098, 19613.318, 19236.799, 18865.4382, 18498.8244, 18136.5138, 17778.8668, 17426.2344, 17079.32, 16734.778, 16397.2418, 16063.3324, 15734.0232, 15409.731, 15088.728, 14772.9896, 14464.1402, 14157.5588, 13855.5958, 13559.3296, 13264.9096, 12978.326, 12692.0826, 12413.8816, 12137.3192, 11870.2326, 11602.5554, 11340.3142, 11079.613, 10829.5908, 10583.5466, 10334.0344, 10095.5072, 9859.694, 9625.2822, 9395.7862, 9174.0586, 8957.3164, 8738.064, 8524.155, 8313.7396, 8116.9168, 7913.542, 7718.4778, 7521.65, 7335.5596, 7154.2906, 6968.7396, 6786.3996, 6613.236, 6437.406, 6270.6598, 6107.7958, 5945.7174, 5787.6784, 5635.5784, 5482.308, 5337.9784, 5190.0864, 5045.9158, 4919.1386, 4771.817, 4645.7742, 4518.4774, 4385.5454, 4262.6622, 4142.74679999999, 4015.5318, 3897.9276, 3790.7764, 3685.13800000001, 3573.6274, 3467.9706, 3368.61079999999, 3271.5202, 3170.3848, 3076.4656, 2982.38400000001, 2888.4664, 2806.4868, 2711.9564, 2634.1434, 2551.3204, 2469.7662, 2396.61139999999, 2318.9902, 2243.8658, 2171.9246, 2105.01360000001, 2028.8536, 1960.9952, 1901.4096, 1841.86079999999, 1777.54700000001, 1714.5802, 1654.65059999999, 1596.311, 1546.2016, 1492.3296, 1433.8974, 1383.84600000001, 1339.4152, 1293.5518, 1245.8686, 1193.50659999999, 1162.27959999999, 1107.19439999999, 1069.18060000001, 1035.09179999999, 999.679000000004, 957.679999999993, 925.300199999998, 888.099400000006, 848.638600000006, 818.156400000007, 796.748399999997, 752.139200000005, 725.271200000003, 692.216, 671.633600000001, 647.939799999993, 621.670599999998, 575.398799999995, 561.226599999995, 532.237999999998, 521.787599999996, 483.095799999996, 467.049599999998, 465.286399999997, 415.548599999995, 401.047399999996, 380.607999999993, 377.362599999993, 347.258799999996, 338.371599999999, 310.096999999994, 301.409199999995, 276.280799999993, 265.586800000005, 258.994399999996, 223.915999999997, 215.925399999993, 213.503800000006, 191.045400000003, 166.718200000003, 166.259000000005, 162.941200000001, 148.829400000002, 141.645999999993, 123.535399999993, 122.329800000007, 89.473399999988, 80.1962000000058, 77.5457999999926, 59.1056000000099, 83.3509999999951, 52.2906000000075, 36.3979999999865, 40.6558000000077, 42.0003999999899, 19.6630000000005, 19.7153999999864, -8.38539999999921, -0.692799999989802, 0.854800000000978, 3.23219999999856, -3.89040000000386, -5.25880000001052, -24.9052000000083, -22.6837999999989, -26.4286000000138, -34.997000000003, -37.0216000000073, -43.430400000012, -58.2390000000014, -68.8034000000043, -56.9245999999985, -57.8583999999973, -77.3097999999882, -73.2793999999994, -81.0738000000129, -87.4530000000086, -65.0254000000132, -57.296399999992, -96.2746000000043, -103.25, -96.081600000005, -91.5542000000132, -102.465200000006, -107.688599999994, -101.458000000013, -109.715800000005, }, // precision 16 { 47270, 46423.3584, 45585.7074, 44757.152, 43938.8416, 43130.9514, 42330.03, 41540.407, 40759.6348, 39988.206, 39226.5144, 38473.2096, 37729.795, 36997.268, 36272.6448, 35558.665, 34853.0248, 34157.4472, 33470.5204, 32793.5742, 32127.0194, 31469.4182, 30817.6136, 30178.6968, 29546.8908, 28922.8544, 28312.271, 27707.0924, 27114.0326, 26526.692, 25948.6336, 25383.7826, 24823.5998, 24272.2974, 23732.2572, 23201.4976, 22674.2796, 22163.6336, 21656.515, 21161.7362, 20669.9368, 20189.4424, 19717.3358, 19256.3744, 18795.9638, 18352.197, 17908.5738, 17474.391, 17052.918, 16637.2236, 16228.4602, 15823.3474, 15428.6974, 15043.0284, 14667.6278, 14297.4588, 13935.2882, 13578.5402, 13234.6032, 12882.1578, 12548.0728, 12219.231, 11898.0072, 11587.2626, 11279.9072, 10973.5048, 10678.5186, 10392.4876, 10105.2556, 9825.766, 9562.5444, 9294.2222, 9038.2352, 8784.848, 8533.2644, 8301.7776, 8058.30859999999, 7822.94579999999, 7599.11319999999, 7366.90779999999, 7161.217, 6957.53080000001, 6736.212, 6548.21220000001, 6343.06839999999, 6156.28719999999, 5975.15419999999, 5791.75719999999, 5621.32019999999, 5451.66, 5287.61040000001, 5118.09479999999, 4957.288, 4798.4246, 4662.17559999999, 4512.05900000001, 4364.68539999999, 4220.77720000001, 4082.67259999999, 3957.19519999999, 3842.15779999999, 3699.3328, 3583.01180000001, 3473.8964, 3338.66639999999, 3233.55559999999, 3117.799, 3008.111, 2909.69140000001, 2814.86499999999, 2719.46119999999, 2624.742, 2532.46979999999, 2444.7886, 2370.1868, 2272.45259999999, 2196.19260000001, 2117.90419999999, 2023.2972, 1969.76819999999, 1885.58979999999, 1833.2824, 1733.91200000001, 1682.54920000001, 1604.57980000001, 1556.11240000001, 1491.3064, 1421.71960000001, 1371.22899999999, 1322.1324, 1264.7892, 1196.23920000001, 1143.8474, 1088.67240000001, 1073.60380000001, 1023.11660000001, 959.036400000012, 927.433199999999, 906.792799999996, 853.433599999989, 841.873800000001, 791.1054, 756.899999999994, 704.343200000003, 672.495599999995, 622.790399999998, 611.254799999995, 567.283200000005, 519.406599999988, 519.188400000014, 495.312800000014, 451.350799999986, 443.973399999988, 431.882199999993, 392.027000000002, 380.924200000009, 345.128999999986, 298.901400000002, 287.771999999997, 272.625, 247.253000000026, 222.490600000019, 223.590000000026, 196.407599999977, 176.425999999978, 134.725199999986, 132.4804, 110.445599999977, 86.7939999999944, 56.7038000000175, 64.915399999998, 38.3726000000024, 37.1606000000029, 46.170999999973, 49.1716000000015, 15.3362000000197, 6.71639999997569, -34.8185999999987, -39.4476000000141, 12.6830000000191, -12.3331999999937, -50.6565999999875, -59.9538000000175, -65.1054000000004, -70.7576000000117, -106.325200000021, -126.852200000023, -110.227599999984, -132.885999999999, -113.897200000007, -142.713800000027, -151.145399999979, -150.799200000009, -177.756200000003, -156.036399999983, -182.735199999996, -177.259399999981, -198.663600000029, -174.577600000019, -193.84580000001, }, // precision 17 { 94541, 92848.811, 91174.019, 89517.558, 87879.9705, 86262.7565, 84663.5125, 83083.7435, 81521.7865, 79977.272, 78455.9465, 76950.219, 75465.432, 73994.152, 72546.71, 71115.2345, 69705.6765, 68314.937, 66944.2705, 65591.255, 64252.9485, 62938.016, 61636.8225, 60355.592, 59092.789, 57850.568, 56624.518, 55417.343, 54231.1415, 53067.387, 51903.526, 50774.649, 49657.6415, 48561.05, 47475.7575, 46410.159, 45364.852, 44327.053, 43318.4005, 42325.6165, 41348.4595, 40383.6265, 39436.77, 38509.502, 37594.035, 36695.939, 35818.6895, 34955.691, 34115.8095, 33293.949, 32465.0775, 31657.6715, 30877.2585, 30093.78, 29351.3695, 28594.1365, 27872.115, 27168.7465, 26477.076, 25774.541, 25106.5375, 24452.5135, 23815.5125, 23174.0655, 22555.2685, 21960.2065, 21376.3555, 20785.1925, 20211.517, 19657.0725, 19141.6865, 18579.737, 18081.3955, 17578.995, 17073.44, 16608.335, 16119.911, 15651.266, 15194.583, 14749.0495, 14343.4835, 13925.639, 13504.509, 13099.3885, 12691.2855, 12328.018, 11969.0345, 11596.5145, 11245.6355, 10917.6575, 10580.9785, 10277.8605, 9926.58100000001, 9605.538, 9300.42950000003, 8989.97850000003, 8728.73249999998, 8448.3235, 8175.31050000002, 7898.98700000002, 7629.79100000003, 7413.76199999999, 7149.92300000001, 6921.12650000001, 6677.1545, 6443.28000000003, 6278.23450000002, 6014.20049999998, 5791.20299999998, 5605.78450000001, 5438.48800000001, 5234.2255, 5059.6825, 4887.43349999998, 4682.935, 4496.31099999999, 4322.52250000002, 4191.42499999999, 4021.24200000003, 3900.64799999999, 3762.84250000003, 3609.98050000001, 3502.29599999997, 3363.84250000003, 3206.54849999998, 3079.70000000001, 2971.42300000001, 2867.80349999998, 2727.08100000001, 2630.74900000001, 2496.6165, 2440.902, 2356.19150000002, 2235.58199999999, 2120.54149999999, 2012.25449999998, 1933.35600000003, 1820.93099999998, 1761.54800000001, 1663.09350000002, 1578.84600000002, 1509.48149999999, 1427.3345, 1379.56150000001, 1306.68099999998, 1212.63449999999, 1084.17300000001, 1124.16450000001, 1060.69949999999, 1007.48849999998, 941.194499999983, 879.880500000028, 836.007500000007, 782.802000000025, 748.385499999975, 647.991500000004, 626.730500000005, 570.776000000013, 484.000500000024, 513.98550000001, 418.985499999952, 386.996999999974, 370.026500000036, 355.496999999974, 356.731499999994, 255.92200000002, 259.094000000041, 205.434499999974, 165.374500000034, 197.347500000033, 95.718499999959, 67.6165000000037, 54.6970000000438, 31.7395000000251, -15.8784999999916, 8.42500000004657, -26.3754999999655, -118.425500000012, -66.6629999999423, -42.9745000000112, -107.364999999991, -189.839000000036, -162.611499999999, -164.964999999967, -189.079999999958, -223.931499999948, -235.329999999958, -269.639500000048, -249.087999999989, -206.475499999942, -283.04449999996, -290.667000000016, -304.561499999953, -336.784499999951, -380.386500000022, -283.280499999993, -364.533000000054, -389.059499999974, -364.454000000027, -415.748000000021, -417.155000000028, }, // precision 18 { 189083, 185696.913, 182348.774, 179035.946, 175762.762, 172526.444, 169329.754, 166166.099, 163043.269, 159958.91, 156907.912, 153906.845, 150924.199, 147996.568, 145093.457, 142239.233, 139421.475, 136632.27, 133889.588, 131174.2, 128511.619, 125868.621, 123265.385, 120721.061, 118181.769, 115709.456, 113252.446, 110840.198, 108465.099, 106126.164, 103823.469, 101556.618, 99308.004, 97124.508, 94937.803, 92833.731, 90745.061, 88677.627, 86617.47, 84650.442, 82697.833, 80769.132, 78879.629, 77014.432, 75215.626, 73384.587, 71652.482, 69895.93, 68209.301, 66553.669, 64921.981, 63310.323, 61742.115, 60205.018, 58698.658, 57190.657, 55760.865, 54331.169, 52908.167, 51550.273, 50225.254, 48922.421, 47614.533, 46362.049, 45098.569, 43926.083, 42736.03, 41593.473, 40425.26, 39316.237, 38243.651, 37170.617, 36114.609, 35084.19, 34117.233, 33206.509, 32231.505, 31318.728, 30403.404, 29540.0550000001, 28679.236, 27825.862, 26965.216, 26179.148, 25462.08, 24645.952, 23922.523, 23198.144, 22529.128, 21762.4179999999, 21134.779, 20459.117, 19840.818, 19187.04, 18636.3689999999, 17982.831, 17439.7389999999, 16874.547, 16358.2169999999, 15835.684, 15352.914, 14823.681, 14329.313, 13816.897, 13342.874, 12880.882, 12491.648, 12021.254, 11625.392, 11293.7610000001, 10813.697, 10456.209, 10099.074, 9755.39000000001, 9393.18500000006, 9047.57900000003, 8657.98499999999, 8395.85900000005, 8033, 7736.95900000003, 7430.59699999995, 7258.47699999996, 6924.58200000005, 6691.29399999999, 6357.92500000005, 6202.05700000003, 5921.19700000004, 5628.28399999999, 5404.96799999999, 5226.71100000001, 4990.75600000005, 4799.77399999998, 4622.93099999998, 4472.478, 4171.78700000001, 3957.46299999999, 3868.95200000005, 3691.14300000004, 3474.63100000005, 3341.67200000002, 3109.14000000001, 3071.97400000005, 2796.40399999998, 2756.17799999996, 2611.46999999997, 2471.93000000005, 2382.26399999997, 2209.22400000005, 2142.28399999999, 2013.96100000001, 1911.18999999994, 1818.27099999995, 1668.47900000005, 1519.65800000005, 1469.67599999998, 1367.13800000004, 1248.52899999998, 1181.23600000003, 1022.71900000004, 1088.20700000005, 959.03600000008, 876.095999999903, 791.183999999892, 703.337000000058, 731.949999999953, 586.86400000006, 526.024999999907, 323.004999999888, 320.448000000091, 340.672999999952, 309.638999999966, 216.601999999955, 102.922999999952, 19.2399999999907, -0.114000000059605, -32.6240000000689, -89.3179999999702, -153.497999999905, -64.2970000000205, -143.695999999996, -259.497999999905, -253.017999999924, -213.948000000091, -397.590000000084, -434.006000000052, -403.475000000093, -297.958000000101, -404.317000000039, -528.898999999976, -506.621000000043, -513.205000000075, -479.351000000024, -596.139999999898, -527.016999999993, -664.681000000099, -680.306000000099, -704.050000000047, -850.486000000034, -757.43200000003, -713.308999999892, } }; #endif /* HYPERLOGLOGBIAS_H_ */ kraken2-2.1.2/src/hyperloglogplus.cc000066400000000000000000000664211404614725400174260ustar00rootroot00000000000000/* * Copyright 2017-2018, Florian Breitwieser * * This file was originally developed for the KrakenUniq taxonomic classification system. */ #include "hyperloglogplus.h" #include #include #include #include #include #include //log and sqrt #include //vector.count #include #include //accummulate #include "hyperloglogplus-bias.h" ///////////////////////////////////////////////////////////////////// // Helper methods for bit operations // functions for counting the number of leading 0-bits (clz) // and counting the number of trailing 0-bits (ctz) //#ifdef __GNUC__ // TODO: switch between builtin clz and 64_clz based on architecture //#define clz(x) __builtin_clz(x) #if 0 static int clz_manual(uint64_t x) { // This uses a binary search (counting down) algorithm from Hacker's Delight. uint64_t y; int n = 64; y = x >>32; if (y != 0) {n -= 32; x = y;} y = x >>16; if (y != 0) {n -= 16; x = y;} y = x >> 8; if (y != 0) {n -= 8; x = y;} y = x >> 4; if (y != 0) {n -= 4; x = y;} y = x >> 2; if (y != 0) {n -= 2; x = y;} y = x >> 1; if (y != 0) return n - 2; return n - x; } #endif #ifdef WIN32 #include #define __builtin_clz(x) __lzcnt(x) #define __builtin_clzl(x) __lzcnt64(x) #endif inline uint8_t clz(const uint32_t x, const uint8_t max = 32) { if (x == 0) { return max; } return __builtin_clz(x); } inline uint8_t clz(const uint64_t x, const uint8_t max = 64) { if (x == 0) { return max; } return __builtin_clzl(x); } template inline uint8_t clz_p(const T x, const uint8_t p = 1) { return __builtin_clz(((x<<1)|1)<<(p-1)); } /** * Extract bits (from uint32_t or uint64_t) using LSB 0 numbering from hi to lo, including lo */ template T extractBits(T value, uint8_t hi, uint8_t lo, bool shift_left = false) { // create a bitmask: // (T(1) << (hi - lo) a 1 at the position (hi - lo) // ((T(1) << (hi - lo) - 1) 1's from position 0 to position (hi-lo-1) // (((T(1) << (hi - lo)) - 1) << lo) 1's from position lo to position hi // The T(1) is required to not cause overflow on 32bit machines // TODO: consider creating a bitmask only once in the beginning T bitmask = (((T(1) << (hi - lo)) - 1) << lo); T result = value & bitmask; if (!shift_left) { // shift resulting bits to the right result = result >> lo; } else { // shift resulting bits to the left result = result << (sizeof(T)*8 - hi); } return result; } inline uint64_t extractHighBits(uint64_t bits, uint8_t hi) { return bits >> (64u - hi); } inline uint32_t extractHighBits(uint32_t bits, uint8_t hi) { return bits >> (32u - hi); } inline uint32_t getIndex(const uint64_t hash_value, const uint8_t p) { // take first p bits as index {x63,...,x64-p} return hash_value >> (64 - p); } inline uint32_t getIndex(const uint32_t hash_value, const uint8_t p) { // take first p bits as index {x31,...,x32-p} return hash_value >> (32 - p); } template inline T trailingOnes(const uint8_t p) { return (T(1) << p ) - 1; } uint8_t getRank(const uint32_t hash_value, const uint8_t p) { // shift p values off, and count leading zeros of the remaining string {x31-p,...,x0} uint32_t rank_bits (hash_value << p); uint8_t rank_val = clz(rank_bits, 32-p) + 1; //uint8_t rank_val = clz_p(hash_value, p) + 1; assert(rank_val < 32-p+1); return rank_val; } uint8_t getRank(const uint64_t hash_value, const uint8_t p) { // shift p values off, and count leading zeros of the remaining string {x63-p,...,x0} uint64_t rank_bits (hash_value << p); uint8_t rank_val = clz(rank_bits, 64-p) + 1; //uint8_t rank_val = clz_p(hash_value, p) + 1; assert(rank_val <= 64-p+1); return rank_val; } ///////////////////////////////////////////////////////////////////// // Methods for spare representation, Heule et al., 2015 uint8_t getEncodedRank(const uint32_t encoded_hash_value, const uint8_t pPrime, const uint8_t p) { // check if the least significant bit is 1 if ( (encoded_hash_value & 1) == 1) { // if yes: the hash was stored with higher precision, bits p to pPrime were 0 uint8_t additional_rank = pPrime - p; return additional_rank + extractBits(encoded_hash_value, 7, 1); } else { return getRank(encoded_hash_value,p); } } /** * To decode hash from sparse representation: res.idx = getIndex(encoded_hash_value, p); res.rank = getEncodedRank(encoded_hash_value, p, Prime); */ /** * Encode the 64-bit hash code x as an 32-bit integer, to be used in the sparse representation. * * Difference from the algorithm described in the paper: * The index always is in the p most significant bits * * see section 5.3 in Heule et al. * @param x the hash bits * @return encoded hash value */ inline uint32_t encodeHashIn32Bit(uint64_t hash_value, uint8_t pPrime, uint8_t p) { // extract first pPrime bits as index uint32_t idx = (uint32_t)(extractHighBits(hash_value,pPrime) << (32-pPrime)); #ifdef HLL_DEBUG2 cerr << "encoding hash: " << bitset<64>(hash_value) << endl; cerr << "index': " << bitset<32>(idx) << " ( bits from 64 to " << 64-pPrime << "; " << idx << ")" << endl; #endif // are the bits after bit p in index' all 0? if (idx << p == 0) { // compute the additional rank (minimum rank is already p'-p) // the maximal size will be below 2^6=64. We thus combine the 25 bits of the index with 6 bits for the rank, and one bit as flag uint8_t additional_rank = getRank(hash_value, pPrime); // this is rank - (p'-p), as we know that positions p'...p are 0 #ifdef HLL_DEBUG2 cerr << "All zero " << endl; #endif return idx | uint32_t(additional_rank<<1) | 1; } else { // else, return the idx, only - it has enough length to calculate the rank (left-shifted, last bit = 0) assert((idx & 1) == 0); return idx; } } inline void addHashToSparseList(vector& vec, const uint32_t val, const uint8_t pPrime) { //if (sparseList_is_sorted) { auto it = std::lower_bound( vec.begin(), vec.end(), val); // find proper position in descending order if (it == vec.end()) { // position at the end vec.insert( it, val ); // insert before iterator it } else if (*it != val) { // val not in the vector if (extractHighBits(val,pPrime) == extractHighBits(*it,pPrime)) { //the values have the same index // it's pretty unlikely to observe different hashes with the same index // pPrime is 25, which means that the chance of another random number // having the same index is 1/2^25 or about 1/33,500,000. Sparse representation // doesn't go that high, though. // Maybe a hash should be added even if it has the same index? // (TODO: Check how often this occurs and the relative errors of the estimates) if ((*it & 1) == (val & 1)) { // if both are in the same category if ((val & 1) == 1) { // both have ones as lsb - replace if val is greater if (val > *it) *it = val; } else { // both have zeros as lsb - replace if val is smaller if (val < *it) *it = val; } } else if ((val & 1) == 1) { // replace if lsb of val is 1 *it = val; } } else { vec.insert( it, val ); // insert before iterator it } } //} else { // vec.push_back(val); //} } template inline void addHashToSparseList(SET& uset, const uint32_t val, const uint8_t /*pPrime*/) { // this implementation currently does not check if there is a value for an index of length pPrime uset.insert(val); } //////////////////////////////////////////////////////////////////// // Other Flajolet/Heule HLL functions /** * Bias correction factors for specific m's * @param m * @return */ double alpha(uint32_t m) { switch (m) { case 16: return 0.673; case 32: return 0.697; case 64: return 0.709; } // m >= 128 return 0.7213 / (1 + 1.079/double(m)); } /** * Gives the estimated cardinality for m bins, v of which are non-zero * using linear counting of Whang et al., 1990: n_hat = -m ln(v) * @param m number of bins in the matrix * @param v number of non-zero bins * @return */ double linearCounting(const uint32_t m, const uint32_t v) { if (v > m) { throw std::invalid_argument("number of v should not be greater than m"); } return double(m) * log(double(m)/double(v)); } /** * calculate the raw estimate as harmonic mean of the ranks in the register */ inline double calculateRawEstimate(const vector& M) { double inverseSum = 0.0; for (size_t i = 0; i < M.size(); ++i) { inverseSum += 1. / (1ull << M[i]); } return alpha(M.size()) * double(M.size() * M.size()) * 1. / inverseSum; } uint32_t countZeros(vector s) { return (uint32_t)count(s.begin(), s.end(), 0); } /** * Estimate the bias of raw estimate using empirically determined values. * Uses weighted average of the two cells between which the estimate falls. * TODO: Check if nearest neighbor average gives better values, as proposed in the paper * @param est * @return correction value for */ double getEstimateBias(double estimate, uint8_t p) { const vector& rawEstimateTable = rawEstimateData[p-4]; const vector& biasTable = biasData[p-4]; // check if estimate is lower than first entry, or larger than last if (rawEstimateTable.front() >= estimate) { return biasTable.front(); } if (rawEstimateTable.back() <= estimate) { return biasTable.back(); } // get iterator to first element that is not smaller than estimate vector::const_iterator it = lower_bound(rawEstimateTable.begin(),rawEstimateTable.end(),estimate); size_t pos = it - rawEstimateTable.begin(); double e1 = rawEstimateTable[pos-1]; double e2 = rawEstimateTable[pos]; double c = (estimate - e1) / (e2 - e1); D(cerr << "bias correction factor c: (estimate - e1)/ (e2 - e1) = (" << estimate << " - " << e1 << ")/(" << e2 << " - " << e1 << ")" << endl;) D(cerr << "biasTable[" << pos-1 << "]*(1-c) + biasTable[" << pos << "]*c" << endl; ) return biasTable[pos-1]*(1-c) + biasTable[pos]*c; } ///////////////////////////////////////////////////////////////////// // Methods for improved estimator of Ertl et al. /** * returns a 'register histogram' vector C, * where C[i] is the number of elements in M with value i * it's size is q+1 = 64-p+1 * used in Ertl's improved estimator */ vector registerHistogram(const vector& M, uint8_t q) { vector C(q+2, 0); for (size_t i = 0; i < M.size(); ++i) { if (M[i] >= q+1) { cerr << "M["< sparseRegisterHistogram(const SparseListType& sparseList, uint8_t pPrime, uint8_t p, uint8_t q){ vector C(q+2, 0); size_t m = 1 << pPrime; for (const auto& encoded_hash_value : sparseList) { uint8_t rank_val = getEncodedRank(encoded_hash_value, pPrime, p); ++C[rank_val]; --m; } C[0] = m; return C; } /** * Ertl - calculation of sigma correction for 0-registers in M * x is the proportion of 0-registers, thus x \in [0, 1] * sigma := x + sum[from k=1 to Inf] ( x^(2^k) * 2^(k-1) ) */ double sigma(double x) { assert(x >= 0.0 && x <= 1.0); if (x == 1.0) { return std::numeric_limits::infinity(); } double prev_sigma_x; double sigma_x = x; double y = 1.0; do { // loop until sigma_x does not change anymore prev_sigma_x = sigma_x; x *= x; // gives x^(2^k) sigma_x += x * y; y += y; // gives 2^(k-1) } while (sigma_x != prev_sigma_x); return sigma_x; } // modification of the implementation - currently not used double sigma_mod(double x) { assert(x >= 0.0 && x <= 1.0); if (x == 1.0) { return std::numeric_limits::infinity(); } double sigma_x = x; for (double x_sq = x*x, two_exp = 1.0; x_sq > std::numeric_limits::epsilon(); x_sq *= x_sq, two_exp += two_exp) { sigma_x += x_sq * two_exp; } return sigma_x; } /** * Ertl - calculation of tau correction for values higher then q in M * x is the proportion of registers with a value below q in M, thus x \in [0,1] * tau := 1/3 (1 - x - sum[from k=1 to Inf](1-x^(2^(-k))^2 * 2^(-k) )) */ double tau(double x) { assert(x >= 0.0 && x <= 1.0); if (x == 0.0 || x == 1.0) { return 0.0; } double prev_tau_x; double y = 1.0; double tau_x = 1 - x; do { // loop until tau_x does not change anymore prev_tau_x = tau_x; x = std::sqrt(x); // gives x^(2^-k) y /= 2.0; // gives 2^(-k) tau_x -= std::pow(1-x, 2) * y; } while (tau_x != prev_tau_x); return tau_x / 3.0; } ///////////////////////////////////////////////////////////////////// // HyperLogLogPlusMinus class methods template<> HyperLogLogPlusMinus::HyperLogLogPlusMinus(uint8_t precision, bool sparse, uint64_t (*bit_mixer) (uint64_t)): p(precision), m(1< 18 || precision < 4) { throw std::invalid_argument("precision (number of register = 2^precision) must be between 4 and 18"); } if (sparse) { this->sparseList = SparseListType(); // TODO: if SparseListType is changed, initialize with appropriate size this->sparseList.reserve(m/4); } else { this->M = vector(m); } } template HyperLogLogPlusMinus& HyperLogLogPlusMinus::operator= (HyperLogLogPlusMinus&& other) { p = other.p; m = other.m; M = std::move(other.M); n_observed = other.n_observed; sparse = other.sparse; sparseList = std::move(other.sparseList); bit_mixer = other.bit_mixer; return *this; } template HyperLogLogPlusMinus& HyperLogLogPlusMinus::operator= (const HyperLogLogPlusMinus& other) { p = other.p; m = other.m; M = other.M; n_observed = other.n_observed; sparse = other.sparse; sparseList = other.sparseList; bit_mixer = other.bit_mixer; return *this; } template HyperLogLogPlusMinus::HyperLogLogPlusMinus(const HyperLogLogPlusMinus& other): p(other.p), m(other.m), M(other.M), n_observed(other.n_observed), sparse(other.sparse), sparseList(other.sparseList), bit_mixer(other.bit_mixer) { } template HyperLogLogPlusMinus::HyperLogLogPlusMinus(HyperLogLogPlusMinus&& other): p(other.p), m(other.m), M(std::move(other.M)), n_observed(other.n_observed), sparse(other.sparse), sparseList(std::move(other.sparseList)), bit_mixer(other.bit_mixer) { } template<> void HyperLogLogPlusMinus::insert(uint64_t item) { ++ n_observed; // compute hash for item uint64_t hash_value = bit_mixer(item); #ifdef HLL_DEBUG2 cerr << "Value: " << item << "; hash(value): " << hash_value << endl; cerr << bitset<64>(hash_value) << endl; #endif if (sparse && this->sparseList.size() + 1 > this->m/4) { switchToNormalRepresentation(); } if (sparse) { // sparse mode: put the encoded hash into sparse list uint32_t encoded_hash_value = encodeHashIn32Bit(hash_value, pPrime, p); addHashToSparseList(sparseList, encoded_hash_value, pPrime); #ifdef HLL_DEBUG2 cerr << "encoded hash: " << bitset<32>(encoded_hash_value) << endl; assert(getIndex(encoded_hash_value,p) == getIndex(hash_value, p)); assert(getEncodedRank(encoded_hash_value,pPrime,p) == getRank(hash_value, p)); #endif // if the sparseList is too large, switch to normal (register) representation } else { // normal mode // take first p bits as index {x63,...,x64-p} uint32_t idx = getIndex(hash_value, p); // shift those p values off, and count leading zeros of the remaining string {x63-p,...,x0} uint8_t rank = getRank(hash_value, p); // update the register if current rank is bigger if (rank > this->M[idx]) { this->M[idx] = rank; } } } template void HyperLogLogPlusMinus::insert(const vector& items) { for(auto i : items) { this->insert(i); } } // reset to original state template void HyperLogLogPlusMinus::reset() { this->sparse = true; this->sparseList.clear(); // this->M.clear(); } // Convert from sparse representation (using sparseList) to normal (using register) template void HyperLogLogPlusMinus::switchToNormalRepresentation() { if (!sparse) return; D(cerr << "switching to normal representation" << endl;) #ifdef HLL_DEBUG cerr << " est before: " << cardinality() << endl; #endif this->sparse = false; this->M = vector(this->m); addToRegisters(this->sparseList); this->sparseList.clear(); #ifdef HLL_DEBUG cerr << " est after: " << cardinality() << endl; #endif } // add sparseList to the registers of M template void HyperLogLogPlusMinus::addToRegisters(const SparseListType &sparseList) { if (sparse) { cerr << "Cannot add to registers of a sparse HLL" << endl; return; } if (sparseList.size() == 0) { return; } for (auto encoded_hash_value_ptr = sparseList.begin(); encoded_hash_value_ptr != sparseList.end(); ++encoded_hash_value_ptr) { size_t idx = getIndex(*encoded_hash_value_ptr, p); assert(idx < M.size()); uint8_t rank_val = getEncodedRank(*encoded_hash_value_ptr, pPrime, p); if (rank_val > this->M[idx]) { this->M[idx] = rank_val; } } } template uint64_t HyperLogLogPlusMinus::nObserved() const { return n_observed; } template void HyperLogLogPlusMinus::merge(HyperLogLogPlusMinus&& other) { if (this->p != other.p) { throw std::invalid_argument("precisions must be equal"); } if (other.n_observed == 0) return; if (this->n_observed == 0) { n_observed = other.n_observed; sparse = other.sparse; sparseList = std::move(other.sparseList); M = std::move(other.M); } else { n_observed += other.n_observed; if (this->sparse && other.sparse) { // this->merge(static_cast&>(other)); // consider using addHashToSparseList(this->sparseList, val, pPrime) and checking for sizes this->sparseList.insert(other.sparseList.begin(), other.sparseList.end()); } else if (other.sparse) { // other is sparse, but this is not addToRegisters(other.sparseList); } else { if (this->sparse) { this->sparse = false; M = std::move(other.M); addToRegisters(this->sparseList); this->sparseList.clear(); } else { // merge registers for (size_t i = 0; i < other.M.size(); ++i) { if (other.M[i] > this->M[i]) { this->M[i] = other.M[i]; } } } } } } // Merge other HyperLogLogPlusMinus into this one. May convert to normal representation template void HyperLogLogPlusMinus::merge(const HyperLogLogPlusMinus& other) { if (this->p != other.p) { throw std::invalid_argument("precisions must be equal"); } if (other.n_observed == 0) return; if (this->n_observed == 0) { // TODO: Make this more efficient when other is disowned n_observed = other.n_observed; sparse = other.sparse; sparseList = other.sparseList; M = other.M; } else { n_observed += other.n_observed; if (this->sparse && other.sparse) { // consider using addHashToSparseList(this->sparseList, val, pPrime) and checking for sizes this->sparseList.insert(other.sparseList.begin(), other.sparseList.end()); } else if (other.sparse) { // other is sparse, but this is not addToRegisters(other.sparseList); } else { if (this->sparse) { this->sparse = false; M = other.M; addToRegisters(this->sparseList); this->sparseList.clear(); } else { // merge registers for (size_t i = 0; i < other.M.size(); ++i) { if (other.M[i] > this->M[i]) { this->M[i] = other.M[i]; } } } } } } template HyperLogLogPlusMinus& HyperLogLogPlusMinus::operator+=(HyperLogLogPlusMinus&& other) { merge(std::move(other)); return *this; } template HyperLogLogPlusMinus& HyperLogLogPlusMinus::operator+=(const HyperLogLogPlusMinus& other) { merge(other); return *this; } template<> uint64_t HyperLogLogPlusMinus::flajoletCardinality(bool use_sparse_precision) const { vector M = this->M; if (sparse) { if (use_sparse_precision) { return round(linearCounting(mPrime, mPrime-uint32_t(sparseList.size()))); } else{ // For testing purposes. Put sparse list into a standard register M = vector(m, 0); for (const auto& val : sparseList) { size_t idx = getIndex(val, p); assert(idx < M.size()); uint8_t rank_val = getEncodedRank(val, pPrime, p); if (rank_val > M[idx]) { M[idx] = rank_val; } } } } double est = calculateRawEstimate(M); if (est <= 2.5*m) { uint32_t v = countZeros(M); if (v > 0) est = linearCounting(m, v); } /* else if (est > 1/30 * pow(2,64) { // No bias correction - should not run in the problem with 64-bit hashes } */ return (use_n_observed && n_observed < est)? n_observed : round(est); } /** * Improved cardinality estimator of Ertl, 2017 (arXiv, section 4) * Based on the underlying distribution, the estimator employs correction * factors for zero and 'over-subscribed' registers. It does not depend on * emprically defined bias correction values or a switch between linear * counting and loglog estimation * * Formula: * alpha_inf * m^2 * -------------------------------------------------------------------------------------- * ( m * sigma(C_0/m) + sum[from k=1 to q] C_k * 2^(-k) + m * tau(1-C_(q+1)/m) * 2^(-q) */ template<> uint64_t HyperLogLogPlusMinus::ertlCardinality() const { size_t q, m; vector C; if (sparse) { q = 64 - pPrime; m = mPrime; C = sparseRegisterHistogram(sparseList, pPrime, p, q); } else { q = 64 - p; m = this->m; C = registerHistogram(M, q); } D(cerr << "\n1. hist. q=" << q << "; m=" << m << endl;) D(cerr << "2. m * tau(m*(1.0 - "<= 1; --k) { est_denominator += C[k]; est_denominator *= 0.5; } D(cerr << est_denominator << endl; ) D(cerr << "4. sigma(" << C[0] << "/m) = " << sigma(double(C[0])/double(m)); ) est_denominator += m * sigma(double(C[0])/double(m)); D(cerr << endl;) double m_sq_alpha_inf = (m / (2.0*std::log(2))) * m; double est = m_sq_alpha_inf / est_denominator; return (use_n_observed && n_observed < est)? n_observed : round(est); } template<> uint64_t HyperLogLogPlusMinus::heuleCardinality(bool correct_bias) const { if (p > 18) { cerr << "Heule HLL++ estimate only works with value of p up to 18 - returning Ertl estimate." << endl; return(ertlCardinality()); } if (sparse) { // if we are 'sparse', then use linear counting with increased precision pPrime uint64_t lc_estimate = round(linearCounting(mPrime, mPrime-uint32_t(sparseList.size()))); D(cerr << "sparse representation - return linear counting estimate("<= 0); if (lc_estimate <= double(threshold[p-4])) { //D(cerr << "below threshold - return it " << endl;) return lc_estimate; } D(cerr << "above threshold of " << threshold[p-4] << " - calculate raw estimate " << endl;) } // calculate raw estimate on registers //double est = alpha(m) * harmonicMean(M, m); double est = calculateRawEstimate(M); D(cerr << "raw estimate: " << est << endl;) // correct for biases if estimate is smaller than 5m if (correct_bias && est <= double(m)*5.0) { D(cerr << "correct bias; subtract " << getEstimateBias(est, p) << endl;) assert(est > getEstimateBias(est, p)); est -= getEstimateBias(est, p); } return (use_n_observed && n_observed < est)? n_observed : round(est); } template<> uint64_t HyperLogLogPlusMinus::cardinality() const { return ertlCardinality(); } template<> uint64_t HyperLogLogPlusMinus::size() const { return cardinality(); } ///////////////////////////////////////////////////////////////////// // Hash and other functions /** * from Numerical Recipes, 3rd Edition, p 352 * Returns hash of u as a 64-bit integer. * */ inline uint64_t ranhash (uint64_t u) { uint64_t v = u * 3935559000370003845 + 2691343689449507681; v ^= v >> 21; v ^= v << 37; v ^= v >> 4; v *= 4768777513237032717; v ^= v << 20; v ^= v >> 41; v ^= v << 5; return v; } /** * Avalanche mixer/finalizer from MurMurHash3 * https://github.com/aappleby/smhasher * from https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp */ uint64_t murmurhash3_finalizer (uint64_t key) { key += 1; // murmurhash returns a hash value of 0 for the key 0 - avoid that. key ^= key >> 33; key *= 0xff51afd7ed558ccd; key ^= key >> 33; key *= 0xc4ceb9fe1a85ec53; key ^= key >> 33; return key; } /* 64-bit mixer developed by Thomas Wang Proposed for HLL by https://github.com/dnbaker/hll https://gist.github.com/badboy/6267743 */ inline uint64_t wang_mixer(uint64_t key) { key = (~key) + (key << 21); // key = (key << 21) - key - 1; key = key ^ (key >> 24); key = (key + (key << 3)) + (key << 8); // key * 265 key = key ^ (key >> 14); key = (key + (key << 2)) + (key << 4); // key * 21 key = key ^ (key >> 28); key = key + (key << 31); return key; } /* inline void merge_lists(vector& vec1, const vector& vec2) { auto it = std::lower_bound( vec.begin(), vec.end(), val); // find proper position in descending order if (it == vec.end()) { vec.insert( it, val ); // insert before iterator it } } */ //#else //#endif template struct NoHash { size_t operator() (const T &u) const { return u; } }; // Always compile 64-bit HLL class template class HyperLogLogPlusMinus; kraken2-2.1.2/src/hyperloglogplus.h000066400000000000000000000100741404614725400172610ustar00rootroot00000000000000/* * Copyright 2017-2018, Florian Breitwieser * * This file was originally developed for the KrakenUniq taxonomic classification system. */ /* * Implementation of 64-bit HyperLogLog algorithm by Flajolet et al., * with sparse mode for increased precision with low cardinalities (Stefan Heule et al.), and * an improved estimator that does not rely on empirical bias correction data (Otmar Ertl) */ #ifndef HYPERLOGLOGPLUS_H_ #define HYPERLOGLOGPLUS_H_ #include "kraken2_headers.h" using namespace std; //#define HLL_DEBUG //#define HLL_DEBUG2 #ifdef HLL_DEBUG #define D(x) x #else #define D(x) #endif // 64-bit Mixer/finalizer from MurMurHash3 https://github.com/aappleby/smhasher uint64_t murmurhash3_finalizer (uint64_t key); // Heule et al. encode the sparse list with variable length encoding // see section 5.3.2. This implementation just uses a sorted vector or unordered_set. typedef unordered_set SparseListType; // Other possible SparseList types: // // typedef vector SparseListType; // The sorted vector implementation is pretty inefficient currently, as the vector // is always kept sorted. // //typedef unordered_set > SparseListType; // No real performance gain from using the hash value directly - and probably problems // with the bucket assignment, since unordered_set expects size_t hashes /** * HyperLogLogPlusMinus class for counting the number of unique 64-bit values in stream * Note that only HASH=uint64_t is implemented. */ template class HyperLogLogPlusMinus { private: uint8_t p; // precision, set in constructor size_t m = 1 << p; // number of registers vector M; // registers, size m uint64_t n_observed = 0; bool sparse; // sparse representation of the data? SparseListType sparseList; HASH (*bit_mixer) (uint64_t); // sparse versions of p and m static const uint8_t pPrime = 25; // precision when using a sparse representation // which encodes the rank + index in 32 bits: // 25 bits for index + // 6 bits for rank + // 1 flag bit indicating if bits p..pPrime are 0 static const uint32_t mPrime = 1 << pPrime; // 2^pPrime public: bool use_n_observed = true; // return min(estimate, n_observed) instead of estimate // Construct HLL with precision bits HyperLogLogPlusMinus(uint8_t precision=12, bool sparse=true, HASH (*bit_mixer) (uint64_t) = murmurhash3_finalizer); HyperLogLogPlusMinus(const HyperLogLogPlusMinus& other); HyperLogLogPlusMinus(HyperLogLogPlusMinus&& other); HyperLogLogPlusMinus& operator= (HyperLogLogPlusMinus&& other); HyperLogLogPlusMinus& operator= (const HyperLogLogPlusMinus& other); ~HyperLogLogPlusMinus() {}; void reset(); // Note: sets sparse=true // Add items or other HLL to this sketch void insert(uint64_t item); void insert(const vector& items); // Merge another sketch into this one // TODO: assumes equal bit_mixers! but does not check that void merge(HyperLogLogPlusMinus&& other); void merge(const HyperLogLogPlusMinus& other); HyperLogLogPlusMinus& operator+=(HyperLogLogPlusMinus&& other); HyperLogLogPlusMinus& operator+=(const HyperLogLogPlusMinus& other); // Calculate cardinality estimates uint64_t cardinality() const; // returns ertlCardinality() uint64_t size() const; // returns ertlCardinality() // HLL++ estimator of Heule et al., 2015. Uses empirical bias correction factors uint64_t heuleCardinality(bool correct_bias = true) const; // Improved estimator of Ertl, 2017. Does not rely on empirical data uint64_t ertlCardinality() const; // Flajolet's cardinality estimator without bias correction uint64_t flajoletCardinality(bool use_sparse_precision = true) const; uint64_t nObserved() const; private: void switchToNormalRepresentation(); void addToRegisters(const SparseListType &sparseList); }; #endif /* HYPERLOGLOGPLUS_H_ */ kraken2-2.1.2/src/kraken2_data.h000066400000000000000000000017571404614725400163600ustar00rootroot00000000000000/* * Copyright 2013-2021, Derrick Wood * * This file is part of the Kraken 2 taxonomic sequence classification system. */ #ifndef KRAKEN2_KRAKEN2_DATA_H_ #define KRAKEN2_KRAKEN2_DATA_H_ #include "kraken2_headers.h" #include "readcounts.h" namespace kraken2 { struct IndexOptions { size_t k; size_t l; uint64_t spaced_seed_mask; uint64_t toggle_mask; bool dna_db; uint64_t minimum_acceptable_hash_value; int revcom_version; // Fix bug from before K2.0.8 int db_version; // To allow for future database structural changes int db_type; // To allow for future use of other data structures }; typedef uint64_t taxid_t; const taxid_t TAXID_MAX = (taxid_t) -1; typedef std::unordered_map taxon_counts_t; #ifdef EXACT_COUNTING typedef ReadCounts> READCOUNTER; #else typedef ReadCounts> READCOUNTER; #endif typedef std::unordered_map taxon_counters_t; } #endif kraken2-2.1.2/src/kraken2_headers.h000066400000000000000000000012751404614725400170550ustar00rootroot00000000000000#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "omp_hack.h" #ifndef SIZE_MAX #define SIZE_MAX ((size_t) -1) #endif #ifndef UINT64_MAX #define UINT64_MAX ((uint64_t) -1) #endif kraken2-2.1.2/src/kv_store.h000066400000000000000000000011761404614725400156610ustar00rootroot00000000000000/* * Copyright 2013-2021, Derrick Wood * * This file is part of the Kraken 2 taxonomic sequence classification system. */ #ifndef KRAKEN2_KV_STORE_H_ #define KRAKEN2_KV_STORE_H_ #include "kraken2_headers.h" namespace kraken2 { typedef uint64_t hkey_t; typedef uint32_t hvalue_t; class KeyValueStore { public: virtual hvalue_t Get(hkey_t key) const = 0; virtual ~KeyValueStore() { } }; uint64_t inline MurmurHash3(hkey_t key) { uint64_t k = (uint64_t) key; k ^= k >> 33; k *= 0xff51afd7ed558ccd; k ^= k >> 33; k *= 0xc4ceb9fe1a85ec53; k ^= k >> 33; return k; } } // end namespace #endif kraken2-2.1.2/src/lookup_accession_numbers.cc000066400000000000000000000073241404614725400212570ustar00rootroot00000000000000/* * Copyright 2013-2021, Derrick Wood * * This file is part of the Kraken 2 taxonomic sequence classification system. */ #include "kraken2_headers.h" #include "mmap_file.h" #include "utilities.h" using namespace kraken2; using std::string; using std::unordered_map; using std::vector; int main(int argc, char **argv) { if (argc < 3) errx(EX_USAGE, "Usage: lookup_accession_numbers "); unordered_map> target_lists; std::ifstream lookup_list_file(argv[1]); string line; while (getline(lookup_list_file, line)) { auto fields = SplitString(line, "\t", 2); string seqid = fields[0]; string accnum = fields[1]; target_lists[accnum].push_back(seqid); } lookup_list_file.close(); auto initial_target_count = target_lists.size(); MMapFile accmap_file; if (isatty(fileno(stderr))) std::cerr << "\rFound 0/" << initial_target_count << " targets..."; uint64_t accessions_searched = 0; for (int i = 2; i < argc; i++) { if (target_lists.empty()) // Stop processing files if we've found all we need break; accmap_file.OpenFile(argv[i]); char *ptr = accmap_file.fptr(); // Skip header line char *lf_ptr = (char *) memchr(ptr, '\n', accmap_file.filesize()); if (lf_ptr != nullptr) ptr = lf_ptr + 1; while ((size_t)(ptr - accmap_file.fptr()) < accmap_file.filesize()) { lf_ptr = (char *) memchr(ptr, '\n', accmap_file.filesize() - (ptr - accmap_file.fptr())); if (lf_ptr == nullptr) { warnx("expected EOL not found at EOF in %s", argv[i]); break; } char *tab_ptr = (char *) memchr(ptr, '\t', lf_ptr - ptr); if (tab_ptr == nullptr) { warnx("expected TAB not found in %s", argv[i]); break; } string accnum(ptr, tab_ptr - ptr); accessions_searched++; if (target_lists.count(accnum)) { for (int j = 0; j < 2; j++) { ptr = tab_ptr + 1; tab_ptr = (char *) memchr(ptr, '\t', lf_ptr - ptr); if (tab_ptr == nullptr) { warnx("expected TAB not found in %s", argv[i]); break; } } if (tab_ptr == nullptr) break; string taxid(ptr, tab_ptr - ptr); for (auto &seqid : target_lists[accnum]) std::cout << seqid << "\t" << taxid << std::endl; target_lists.erase(accnum); if (isatty(fileno(stderr))) std::cerr << "\rFound " << (initial_target_count - target_lists.size()) << "/" << initial_target_count << " targets, searched through " << accessions_searched << " accession IDs..."; if (target_lists.empty()) // Stop processing file if we've found all we need break; } if (accessions_searched % 10000000 == 0 && isatty(fileno(stderr))) std::cerr << "\rFound " << (initial_target_count - target_lists.size()) << "/" << initial_target_count << " targets, searched through " << accessions_searched << " accession IDs..."; ptr = lf_ptr + 1; } accmap_file.CloseFile(); } if (isatty(fileno(stderr))) std::cerr << "\r"; std::cerr << "Found " << (initial_target_count - target_lists.size()) << "/" << initial_target_count << " targets, searched through " << accessions_searched << " accession IDs, search complete." << std::endl; if (! target_lists.empty()) { std::cerr << "lookup_accession_numbers: " << target_lists.size() << "/" << initial_target_count << " accession numbers remain unmapped, see " << "unmapped.txt in DB directory" << std::endl; std::ofstream ofs("unmapped.txt"); for (auto &kv_pair : target_lists) ofs << kv_pair.first << std::endl; } return 0; } kraken2-2.1.2/src/mmap_file.cc000066400000000000000000000056021404614725400161120ustar00rootroot00000000000000/* * Copyright 2013-2021, Derrick Wood * * This file is part of the Kraken 2 taxonomic sequence classification system. */ #include "mmap_file.h" using std::string; namespace kraken2 { MMapFile::MMapFile() { valid_ = false; fptr_ = NULL; filesize_ = 0; fd_ = -1; } void MMapFile::OpenFile(const string &filename_str, int mode, int prot_flags, int map_flags, size_t size) { const char *filename = filename_str.c_str(); OpenFile(filename, mode, map_flags, prot_flags, size); } void MMapFile::OpenFile(const char *filename, int mode, int prot_flags, int map_flags, size_t size) { if (mode & O_APPEND || (mode & O_ACCMODE) == O_WRONLY) errx(EX_SOFTWARE, "illegal mode passed to MMapFile"); if (prot_flags < 0) prot_flags = (mode & O_ACCMODE) == O_RDONLY ? PROT_READ : PROT_READ | PROT_WRITE; if (map_flags < 0) map_flags = (mode & O_ACCMODE) == O_RDONLY ? MAP_PRIVATE : MAP_SHARED; fd_ = open(filename, mode, 0666); if (fd_ < 0) err(EX_OSERR, "unable to open %s", filename); if (mode & O_CREAT) { if (lseek(fd_, size - 1, SEEK_SET) < 0) err(EX_OSERR, "unable to lseek (%s)", filename); if (write(fd_, "", 1) < 0) err(EX_OSERR, "write error (%s)", filename); filesize_ = size; } else { struct stat sb; if (fstat(fd_, &sb) < 0) err(EX_OSERR, "unable to fstat %s", filename); filesize_ = sb.st_size; } fptr_ = (char *) mmap(0, filesize_, prot_flags, map_flags, fd_, 0); if (fptr_ == MAP_FAILED) { err(EX_OSERR, "unable to mmap %s", filename); } valid_ = true; } // Basically a cat operation, loads file into OS cache // I don't use MAP_POPULATE to do this because of portability issues void MMapFile::LoadFile() { int thread_ct = 1; int thread = 0; #ifdef _OPENMP int old_thread_ct = omp_get_max_threads(); // Don't use more than 4 threads, don't want to hammer disk if (old_thread_ct > 4) omp_set_num_threads(4); thread_ct = omp_get_max_threads(); #endif size_t page_size = getpagesize(); char buf[thread_ct][page_size]; #pragma omp parallel { #ifdef _OPENMP thread = omp_get_thread_num(); #endif #pragma omp for schedule(dynamic) for (size_t pos = 0; pos < filesize_; pos += page_size) { size_t this_page_size = filesize_ - pos; if (this_page_size > page_size) this_page_size = page_size; memcpy(buf[thread], fptr_ + pos, this_page_size); } } #ifdef _OPENMP omp_set_num_threads(old_thread_ct); #endif } char * MMapFile::fptr() { return valid_ ? fptr_ : NULL; } size_t MMapFile::filesize() { return valid_ ? filesize_ : 0; } MMapFile::~MMapFile() { CloseFile(); } void MMapFile::SyncFile() { msync(fptr_, filesize_, MS_SYNC); } void MMapFile::CloseFile() { if (! valid_) return; SyncFile(); munmap(fptr_, filesize_); close(fd_); valid_ = false; } } // namespace kraken2-2.1.2/src/mmap_file.h000066400000000000000000000015261404614725400157550ustar00rootroot00000000000000/* * Copyright 2013-2021, Derrick Wood * * This file is part of the Kraken 2 taxonomic sequence classification system. */ #ifndef KRAKEN2_MMAP_FILE_H_ #define KRAKEN2_MMAP_FILE_H_ #include "kraken2_headers.h" namespace kraken2 { class MMapFile { public: MMapFile(); ~MMapFile(); void OpenFile(const std::string &filename, int mode = O_RDONLY, int map_flags = -1, int prot_flags = -1, size_t size = 0); void OpenFile(const char *filename, int mode = O_RDONLY, int map_flags = -1, int prot_flags = -1, size_t size = 0); char *fptr(); size_t filesize(); void LoadFile(); void SyncFile(); void CloseFile(); private: MMapFile(const MMapFile &rhs); MMapFile& operator=(const MMapFile &rhs); bool valid_; int fd_; char *fptr_; size_t filesize_; }; } #endif kraken2-2.1.2/src/mmscanner.cc000066400000000000000000000174631404614725400161540ustar00rootroot00000000000000/* * Copyright 2013-2021, Derrick Wood * * This file is part of the Kraken 2 taxonomic sequence classification system. */ #include "mmscanner.h" #include using std::string; using std::vector; namespace kraken2 { void MinimizerScanner::set_lookup_table_character(char c, uint8_t val) { lookup_table_[(int) c] = val; lookup_table_[tolower(c)] = val; } MinimizerScanner::MinimizerScanner(ssize_t k, ssize_t l, uint64_t spaced_seed_mask, bool dna_sequence, uint64_t toggle_mask, int revcom_version) : str_(nullptr), k_(k), l_(l), str_pos_(0), start_(0), finish_(0), spaced_seed_mask_(spaced_seed_mask), dna_(dna_sequence), toggle_mask_(toggle_mask), loaded_ch_(0), last_ambig_(0), revcom_version_(revcom_version) { if (l_ > (ssize_t) ((sizeof(uint64_t) * 8 - 1) / (dna_ ? BITS_PER_CHAR_DNA : BITS_PER_CHAR_PRO))) errx(EX_SOFTWARE, "l exceeds size limits for minimizer %s scanner", dna_ ? "nucleotide" : "protein"); lmer_mask_ = 1; lmer_mask_ <<= (l_ * (dna_ ? BITS_PER_CHAR_DNA : BITS_PER_CHAR_PRO)); lmer_mask_--; toggle_mask_ &= lmer_mask_; if (finish_ == SIZE_MAX) finish_ = str_->size(); if ((ssize_t) (finish_ - start_) + 1 < l_) // Invalidate scanner if interval < 1 l-mer str_pos_ = finish_; for (int i = 0; i < UINT8_MAX + 1; i++) lookup_table_[i] = UINT8_MAX; if (dna_) { set_lookup_table_character('A', 0x00); set_lookup_table_character('C', 0x01); set_lookup_table_character('G', 0x02); set_lookup_table_character('T', 0x03); } else { // Reduced alphabet uses 15-letter alphabet from AD Solis (2015), // in Proteins (doi:10.1002/prot.24936) // This means that the ambiguous codes B (N/D) and Z (E/Q) aren't // usable here because they span multiple groups. Although J (I/L) // could be used because I & L are in the same group, we treat it as // we do B, Z, and X for consistency and to ease future changes to // the code base. // stop codons/rare amino acids set_lookup_table_character('*', 0x00); set_lookup_table_character('U', 0x00); set_lookup_table_character('O', 0x00); // alanine set_lookup_table_character('A', 0x01); // asparagine, glutamine, serine set_lookup_table_character('N', 0x02); set_lookup_table_character('Q', 0x02); set_lookup_table_character('S', 0x02); // cysteine set_lookup_table_character('C', 0x03); // aspartic acid, glutamic acid set_lookup_table_character('D', 0x04); set_lookup_table_character('E', 0x04); // phenylalanine set_lookup_table_character('F', 0x05); // glycine set_lookup_table_character('G', 0x06); // histidine set_lookup_table_character('H', 0x07); // isoleucine, leucine set_lookup_table_character('I', 0x08); set_lookup_table_character('L', 0x08); // lysine set_lookup_table_character('K', 0x09); // proline set_lookup_table_character('P', 0x0a); // arginine set_lookup_table_character('R', 0x0b); // methionine, valine set_lookup_table_character('M', 0x0c); set_lookup_table_character('V', 0x0c); // threonine set_lookup_table_character('T', 0x0d); // tryptophan set_lookup_table_character('W', 0x0e); // tyrosine set_lookup_table_character('Y', 0x0f); } } void MinimizerScanner::LoadSequence(const string &seq, size_t start, size_t finish) { str_ = &seq; start_ = start; finish_ = finish; str_pos_ = start_; if (finish_ > str_->size()) finish_ = str_->size(); if ((ssize_t) (finish_ - start_) + 1 < l_) // Invalidate scanner if interval < 1 l-mer str_pos_ = finish_; queue_.clear(); queue_pos_ = 0; loaded_ch_ = 0; last_minimizer_ = ~0; last_ambig_ = 0; } uint64_t *MinimizerScanner::NextMinimizer() { if (str_pos_ >= finish_) // Abort if we've exhausted string interval return nullptr; bool changed_minimizer = false; auto bits_per_char = dna_ ? BITS_PER_CHAR_DNA : BITS_PER_CHAR_PRO; auto ambig_code = (1u << bits_per_char) - 1; while (! changed_minimizer) { // Incorporate next character (and more if needed to fill l-mer) if (loaded_ch_ == l_) loaded_ch_--; while (loaded_ch_ < l_ && str_pos_ < finish_) { // char loading loop loaded_ch_++; lmer_ <<= bits_per_char; last_ambig_ <<= bits_per_char; auto lookup_code = lookup_table_[ (int) (*str_)[str_pos_++] ]; if (lookup_code == UINT8_MAX) { queue_.clear(); queue_pos_ = 0; lmer_ = 0; loaded_ch_ = 0; last_ambig_ |= ambig_code; } else lmer_ |= lookup_code; lmer_ &= lmer_mask_; last_ambig_ &= lmer_mask_; // If we haven't filled up first k-mer, don't return // Otherwise, if l-mer is incomplete, still return // If l-mer is complete, go through queue management // If we don't have a full l-mer, there's no point in // queue management for minimizer calculation if ((str_pos_ - start_) >= (size_t) k_ && loaded_ch_ < l_) return &last_minimizer_; } // end character loading loop if (loaded_ch_ < l_) // Abort if we've exhausted string interval w/o return nullptr; // filling the l-mer uint64_t canonical_lmer = dna_ ? canonical_representation(lmer_, l_) : lmer_; if (spaced_seed_mask_) canonical_lmer &= spaced_seed_mask_; uint64_t candidate_lmer = canonical_lmer ^ toggle_mask_; if (k_ == l_) { // Short-circuit queue work last_minimizer_ = candidate_lmer ^ toggle_mask_; return &last_minimizer_; } // Sliding window minimum calculation while (! queue_.empty() && queue_.back().candidate > candidate_lmer) queue_.pop_back(); MinimizerData data = { candidate_lmer, queue_pos_ }; if (queue_.empty() && queue_pos_ >= k_ - l_) { // Empty queue means front will change // Minimizer will change iff we've processed enough l-mers changed_minimizer = true; } queue_.push_back(data); // expire an l-mer not in the current window if (queue_.front().pos < queue_pos_ - k_ + l_) { queue_.erase(queue_.begin()); // Change in front means minimizer changed changed_minimizer = true; } // Change from no minimizer (beginning of sequence/near ambig. char) if (queue_pos_ == k_ - l_) changed_minimizer = true; queue_pos_++; // Return only if we've read in at least one k-mer's worth of chars if (str_pos_ >= (size_t) k_) { break; } } // end while ! changed_minimizer assert(! queue_.empty()); last_minimizer_ = queue_.front().candidate ^ toggle_mask_; return &last_minimizer_; } // Adapted for 64-bit DNA use from public domain code at: // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel uint64_t MinimizerScanner::reverse_complement(uint64_t kmer, uint8_t n) { // Reverse bits (leaving bit pairs - nucleotides - intact) // swap consecutive pairs kmer = ((kmer & 0xCCCCCCCCCCCCCCCCUL) >> 2) | ((kmer & 0x3333333333333333UL) << 2); // swap consecutive nibbles kmer = ((kmer & 0xF0F0F0F0F0F0F0F0UL) >> 4) | ((kmer & 0x0F0F0F0F0F0F0F0FUL) << 4); // swap consecutive bytes kmer = ((kmer & 0xFF00FF00FF00FF00UL) >> 8) | ((kmer & 0x00FF00FF00FF00FFUL) << 8); // swap consecutive byte pairs kmer = ((kmer & 0xFFFF0000FFFF0000UL) >> 16) | ((kmer & 0x0000FFFF0000FFFFUL) << 16); // swap halves of 64-bit word kmer = ( kmer >> 32 ) | ( kmer << 32); // Then complement if (revcom_version_ == 0) // This branch present to maintain backwards compatibility with old DBs return (~kmer) & ((1ull << (n * 2)) - 1); else return ((~kmer) >> (sizeof(kmer) * CHAR_BIT - n * 2)) & ((1ull << (n * 2)) - 1); } uint64_t MinimizerScanner::canonical_representation(uint64_t kmer, uint8_t n) { uint64_t revcom = reverse_complement(kmer, n); return kmer < revcom ? kmer : revcom; } } // end namespace kraken2-2.1.2/src/mmscanner.h000066400000000000000000000042161404614725400160060ustar00rootroot00000000000000/* * Copyright 2013-2021, Derrick Wood * * This file is part of the Kraken 2 taxonomic sequence classification system. */ #ifndef KRAKEN2_MMSCANNER_H_ #define KRAKEN2_MMSCANNER_H_ #include "kraken2_headers.h" namespace kraken2 { const uint64_t DEFAULT_TOGGLE_MASK = 0xe37e28c4271b5a2dULL; const uint64_t DEFAULT_SPACED_SEED_MASK = 0; const int BITS_PER_CHAR_DNA = 2; const int BITS_PER_CHAR_PRO = 4; const int CURRENT_REVCOM_VERSION = 1; struct MinimizerData { uint64_t candidate; ssize_t pos; }; class MinimizerScanner { public: // Create scanner for seq over interval [start, finish) // Will report length l minimizers for all k-mers in the interval // Minimizers ordered by XORing candidates w/ toggle_mask // toggle_mask == 0 implies lexicographical ordering MinimizerScanner(ssize_t k, ssize_t l, uint64_t spaced_seed_mask = DEFAULT_SPACED_SEED_MASK, bool dna_sequence = true, uint64_t toggle_mask = DEFAULT_TOGGLE_MASK, int revcom_version = CURRENT_REVCOM_VERSION); void LoadSequence(const std::string &seq, size_t start = 0, size_t finish = SIZE_MAX); uint64_t *NextMinimizer(); // Return last minimizer, only valid if NextMinimizer last returned non-NULL uint64_t last_minimizer() const { return last_minimizer_; } ssize_t k() const { return k_; } ssize_t l() const { return l_; } bool is_dna() const { return dna_; } bool is_ambiguous() const { return (queue_pos_ < k_ - l_) || (!! last_ambig_); } private: uint64_t reverse_complement(uint64_t kmer, uint8_t n); uint64_t canonical_representation(uint64_t kmer, uint8_t n); void set_lookup_table_character(char ch, uint8_t val); const std::string *str_; // pointer to sequence ssize_t k_; ssize_t l_; size_t str_pos_, start_, finish_; uint64_t spaced_seed_mask_; bool dna_; uint64_t toggle_mask_; uint64_t lmer_; uint64_t lmer_mask_; uint64_t last_minimizer_; ssize_t loaded_ch_; std::vector queue_; ssize_t queue_pos_; uint64_t last_ambig_; uint8_t lookup_table_[UINT8_MAX + 1]; const int revcom_version_; }; } #endif kraken2-2.1.2/src/mmtest.cc000066400000000000000000000005051404614725400154670ustar00rootroot00000000000000#include "kraken2_headers.h" #include "mmscanner.h" using namespace std; using namespace kraken2; int main() { string seq = "ACGATCGACGACG"; MinimizerScanner scanner(10, 5, 0); scanner.LoadSequence(seq); uint64_t *mmp; while ((mmp = scanner.NextMinimizer()) != nullptr) printf("%016x\n", *mmp); return 0; } kraken2-2.1.2/src/omp_hack.cc000066400000000000000000000017201404614725400157370ustar00rootroot00000000000000/* * Copyright 2013-2021, Derrick Wood * * This file is part of the Kraken 2 taxonomic sequence classification system. */ #ifndef _OPENMP #warning "OpenMP not found, multi-threading will be DISABLED!" #warning "To fix this, please make sure your GCC installation uses OpenMP." // This file defines datatypes and declares functions supplied by the // omp.h file to allow compilation on systems that do not have OpenMP // supported by the installed GCC. Note that these functions are // effectively no-op functions that will allow single-threaded operation only. typedef int omp_lock_t; int omp_get_thread_num() { return 0; } int omp_get_max_threads() { return 1; } void omp_set_num_threads(int num) { } void omp_init_lock(omp_lock_t *lock) { } void omp_destroy_lock(omp_lock_t *lock) { } void omp_set_lock(omp_lock_t *lock) { } void omp_unset_lock(omp_lock_t *lock) { } int omp_test_lock(omp_lock_t *lock) { return 1; } #endif // defined _OPENMP kraken2-2.1.2/src/omp_hack.h000066400000000000000000000015461404614725400156070ustar00rootroot00000000000000/* * Copyright 2013-2021, Derrick Wood * * This file is part of the Kraken 2 taxonomic sequence classification system. */ #ifndef KRAKEN2_OMP_HACK_H_ #define KRAKEN2_OMP_HACK_H_ #ifdef _OPENMP #include #else // This file defines a datatype and declares functions supplied by the // omp.h file to allow compilation on systems that do not have OpenMP // supported by the installed GCC. Note that these functions are // effectively no-op functions that will allow single-threaded operation only. typedef int omp_lock_t; int omp_get_thread_num(); int omp_get_max_threads(); void omp_set_num_threads(int num); void omp_init_lock(omp_lock_t *lock); void omp_destroy_lock(omp_lock_t *lock); void omp_set_lock(omp_lock_t *lock); void omp_unset_lock(omp_lock_t *lock); int omp_test_lock(omp_lock_t *lock); #endif // defined _OPENMP #endif kraken2-2.1.2/src/readcounts.h000066400000000000000000000054641404614725400162000ustar00rootroot00000000000000/* * Copyright 2017-2018, Florian Breitwieser * * This file was originally developed for the KrakenUniq taxonomic classification system. */ // Note: This class provides counter information for read and k-mer data // as well as allowing counting/estimation of distinct k-mers via a container // type that is passed in. #ifndef KRAKEN2_READCOUNTS_H #define KRAKEN2_READCOUNTS_H #include "hyperloglogplus.h" #include namespace kraken2 { template class ReadCounts { public: uint64_t readCount() const { return n_reads; } void incrementReadCount() { ++n_reads; } uint64_t kmerCount() const { return n_kmers; } uint64_t distinctKmerCount() const; // to be implemented for each CONTAINER ReadCounts() : n_reads(0), n_kmers(0) { } ReadCounts(uint64_t _n_reads, uint64_t _n_kmers) : n_reads(_n_reads), n_kmers(_n_kmers) { } ReadCounts(const ReadCounts& other) : n_reads(other.n_reads), n_kmers(other.n_kmers), kmers(other.kmers) { } ReadCounts(ReadCounts&& other) : n_reads(other.n_reads), n_kmers(other.n_kmers), kmers(std::move(other.kmers)) { } ReadCounts& operator=(const ReadCounts& other) { n_reads = other.n_reads; n_kmers = other.n_kmers; kmers = other.kmers; return *this; } ReadCounts& operator=(ReadCounts&& other) { n_reads = other.n_reads; n_kmers = other.n_kmers; kmers = std::move(other.kmers); return *this; } void add_kmer(uint64_t kmer) { ++n_kmers; kmers.insert(kmer); } ReadCounts& operator+=(const ReadCounts& other) { n_reads += other.n_reads; n_kmers += other.n_kmers; kmers += other.kmers; return *this; } ReadCounts& operator+=(ReadCounts&& other) { n_reads += other.n_reads; n_kmers += other.n_kmers; kmers += std::move(other.kmers); return *this; } bool operator<(const ReadCounts& other) { if (n_reads < other.n_reads) { return true; } if (n_reads == other.n_reads && n_kmers < other.n_kmers) { return true; } return false; } private: uint64_t n_reads; uint64_t n_kmers; CONTAINER kmers; // distinct k-mer count per taxon }; // Overload operator += for set, so that it can be used for merging template unordered_set& operator+=(unordered_set& left, const unordered_set& right) { left.insert(right.begin(), right.end()); return left; } template set& operator+=(set& left, const set& right) { left.insert(right.begin(), right.end()); return left; } template uint64_t ReadCounts< T >::distinctKmerCount() const { // DEW: changed cardinality() to size() to enable use of STL containers return kmers.size(); } } #endif kraken2-2.1.2/src/reports.cc000066400000000000000000000157161404614725400156660ustar00rootroot00000000000000/* * Copyright 2013-2021, Derrick Wood * * This file is part of the Kraken 2 taxonomic sequence classification system. */ #include "reports.h" using std::vector; using std::string; using std::ofstream; using std::map; namespace kraken2 { taxon_counts_t GetCladeCounts(Taxonomy &tax, taxon_counts_t &call_counts) { taxon_counts_t clade_counts; for (auto &kv_pair : call_counts) { auto taxid = kv_pair.first; auto count = kv_pair.second; while (taxid) { clade_counts[taxid] += count; taxid = tax.nodes()[taxid].parent_id; } } return clade_counts; } taxon_counters_t GetCladeCounters(Taxonomy &tax, taxon_counters_t &call_counters) { taxon_counters_t clade_counters; for (auto &kv_pair : call_counters) { auto taxid = kv_pair.first; auto counter = kv_pair.second; while (taxid) { clade_counters[taxid] += counter; taxid = tax.nodes()[taxid].parent_id; } } return clade_counters; } void PrintMpaStyleReportLine(ofstream &ofs, uint64_t clade_count, const string &taxonomy_line) { ofs << taxonomy_line << "\t" << clade_count << std::endl; } void MpaReportDFS(taxid_t taxid, ofstream &ofs, bool report_zeros, Taxonomy &taxonomy, taxon_counts_t &clade_counts, vector &taxonomy_names) { // Clade count of 0 means all subtree nodes have clade count of 0 if (! report_zeros && clade_counts[taxid] == 0) return; TaxonomyNode node = taxonomy.nodes()[taxid]; string rank = taxonomy.rank_data() + node.rank_offset; char rank_code = '\0'; if (rank == "superkingdom") { rank_code = 'd'; } else if (rank == "kingdom") { rank_code = 'k'; } else if (rank == "phylum") { rank_code = 'p'; } else if (rank == "class") { rank_code = 'c'; } else if (rank == "order") { rank_code = 'o'; } else if (rank == "family") { rank_code = 'f'; } else if (rank == "genus") { rank_code = 'g'; } else if (rank == "species") { rank_code = 's'; } if (rank_code) { string name = ""; name += rank_code; name += "__"; name += (taxonomy.name_data() + node.name_offset); taxonomy_names.push_back(name); string taxonomy_line = ""; for (auto &str : taxonomy_names) taxonomy_line += str + "|"; taxonomy_line.pop_back(); PrintMpaStyleReportLine(ofs, clade_counts[taxid], taxonomy_line); } auto child_count = node.child_count; if (child_count != 0) { vector children(child_count); for (auto i = 0u; i < child_count; i++) { children[i] = node.first_child + i; } // Sorting child IDs by descending order of clade counts sort(children.begin(), children.end(), [&](const uint64_t &a, const uint64_t &b) { return clade_counts[a] > clade_counts[b]; } ); for (auto child : children) { MpaReportDFS(child, ofs, report_zeros, taxonomy, clade_counts, taxonomy_names); } } if (rank_code) taxonomy_names.pop_back(); } void ReportMpaStyle(string filename, bool report_zeros, Taxonomy &taxonomy, taxon_counters_t &call_counters) { taxon_counts_t call_counts; for (auto &kv_pair : call_counters) { call_counts[kv_pair.first] = kv_pair.second.readCount(); } taxon_counts_t clade_counts = GetCladeCounts(taxonomy, call_counts); ofstream ofs(filename); vector taxonomy_names; MpaReportDFS(1, ofs, report_zeros, taxonomy, clade_counts, taxonomy_names); } void PrintKrakenStyleReportLine(ofstream &ofs, bool report_kmer_data, uint64_t total_seqs, READCOUNTER clade_counter, READCOUNTER taxon_counter, const string &rank_str, uint32_t taxid, const string &sci_name, int depth) { char pct_buffer[10] = ""; sprintf(pct_buffer, "%6.2f", 100.0 * clade_counter.readCount() / total_seqs); ofs << pct_buffer << "\t" << clade_counter.readCount() << "\t" << taxon_counter.readCount() << "\t"; if (report_kmer_data) { ofs << clade_counter.kmerCount() << "\t" << clade_counter.distinctKmerCount() << "\t"; } ofs << rank_str << "\t" << taxid << "\t"; for (auto i = 0; i < depth; i++) ofs << " "; ofs << sci_name << std::endl; } // Depth-first search of taxonomy tree, reporting info at each node void KrakenReportDFS(uint32_t taxid, ofstream &ofs, bool report_zeros, bool report_kmer_data, Taxonomy &taxonomy, taxon_counters_t &clade_counters, taxon_counters_t &call_counters, uint64_t total_seqs, char rank_code, int rank_depth, int depth) { // Clade count of 0 means all subtree nodes have clade count of 0 if (! report_zeros && clade_counters[taxid].readCount() == 0) return; TaxonomyNode node = taxonomy.nodes()[taxid]; string rank = taxonomy.rank_data() + node.rank_offset; if (rank == "superkingdom") { rank_code = 'D'; rank_depth = 0; } else if (rank == "kingdom") { rank_code = 'K'; rank_depth = 0; } else if (rank == "phylum") { rank_code = 'P'; rank_depth = 0; } else if (rank == "class") { rank_code = 'C'; rank_depth = 0; } else if (rank == "order") { rank_code = 'O'; rank_depth = 0; } else if (rank == "family") { rank_code = 'F'; rank_depth = 0; } else if (rank == "genus") { rank_code = 'G'; rank_depth = 0; } else if (rank == "species") { rank_code = 'S'; rank_depth = 0; } else { rank_depth++; } string rank_str(&rank_code, 0, 1); if (rank_depth != 0) rank_str += std::to_string(rank_depth); string name = taxonomy.name_data() + node.name_offset; PrintKrakenStyleReportLine(ofs, report_kmer_data, total_seqs, clade_counters[taxid], call_counters[taxid], rank_str, node.external_id, name, depth); auto child_count = node.child_count; if (child_count != 0) { vector children(child_count); for (auto i = 0u; i < child_count; i++) { children[i] = node.first_child + i; } // Sorting child IDs by descending order of clade read counts std::sort(children.begin(), children.end(), [&](const uint64_t &a, const uint64_t &b) { return clade_counters[a].readCount() > clade_counters[b].readCount(); } ); for (auto child : children) { KrakenReportDFS(child, ofs, report_zeros, report_kmer_data, taxonomy, clade_counters, call_counters, total_seqs, rank_code, rank_depth, depth + 1); } } } void ReportKrakenStyle(string filename, bool report_zeros, bool report_kmer_data, Taxonomy &taxonomy, taxon_counters_t &call_counters, uint64_t total_seqs, uint64_t total_unclassified) { taxon_counters_t clade_counters = GetCladeCounters(taxonomy, call_counters); ofstream ofs(filename); // Special handling of the unclassified sequences if (total_unclassified != 0 || report_zeros) { READCOUNTER rc(total_unclassified, 0); PrintKrakenStyleReportLine(ofs, report_kmer_data, total_seqs, rc, rc, "U", 0, "unclassified", 0); } // DFS through the taxonomy, printing nodes as encountered KrakenReportDFS(1, ofs, report_zeros, report_kmer_data, taxonomy, clade_counters, call_counters, total_seqs, 'R', -1, 0); } } // end namespace kraken2-2.1.2/src/reports.h000066400000000000000000000033201404614725400155140ustar00rootroot00000000000000/* * Copyright 2013-2021, Derrick Wood * * This file is part of the Kraken 2 taxonomic sequence classification system. */ #ifndef KRAKEN2_REPORTS_H_ #define KRAKEN2_REPORTS_H_ #include #include #include "kraken2_headers.h" #include "taxonomy.h" #include "kraken2_data.h" #include "readcounts.h" namespace kraken2 { // Still TODO: Create an MPA-style reporter that can take a std::vector of // call_counts and a std::vector of sample IDs void ReportMpaStyle(std::string filename, bool report_zeros, Taxonomy &tax, taxon_counters_t &call_counters); taxon_counts_t GetCladeCounts(Taxonomy &tax, taxon_counts_t &call_counts); taxon_counters_t GetCladeCounters(Taxonomy &tax, taxon_counters_t &call_counters); void PrintMpaStyleReportLine(std::ofstream &ofs, uint64_t clade_count, const std::string &taxonomy_line); void MpaReportDFS(taxid_t taxid, std::ofstream &ofs, bool report_zeros, Taxonomy &taxonomy, taxon_counts_t &clade_counts, std::vector &taxonomy_names); void PrintKrakenStyleReportLine(std::ofstream &ofs, bool report_kmer_data, uint64_t total_seqs, READCOUNTER clade_counter, READCOUNTER taxon_counter, const std::string &rank_str, uint32_t taxid, const std::string &sci_name, int depth); void KrakenReportDFS(uint32_t taxid, std::ofstream &ofs, bool report_zeros, bool report_kmer_data, Taxonomy &taxonomy, taxon_counters_t &clade_counters, taxon_counters_t &call_counters, uint64_t total_seqs, char rank_code, int rank_depth, int depth); void ReportKrakenStyle(std::string filename, bool report_zeros, bool report_kmer_data, Taxonomy &taxonomy, taxon_counters_t &call_counters, uint64_t total_seqs, uint64_t total_unclassified); } #endif kraken2-2.1.2/src/seqreader.cc000066400000000000000000000124551404614725400161400ustar00rootroot00000000000000/* * Copyright 2013-2021, Derrick Wood * * This file is part of the Kraken 2 taxonomic sequence classification system. */ #include "seqreader.h" using std::string; namespace kraken2 { void StripString(string &str) { while (isspace(str.back())) str.pop_back(); } string &Sequence::to_string() { str_representation.assign(header); switch (format) { case FORMAT_FASTQ: str_representation.append("\n"); str_representation.append(seq); str_representation.append("\n+\n"); str_representation.append(quals); str_representation.append("\n"); break; default: str_representation.append("\n"); str_representation.append(seq); str_representation.append("\n"); break; } return str_representation; } BatchSequenceReader::BatchSequenceReader() { file_format_ = FORMAT_AUTO_DETECT; str_buffer_.reserve(8192); block_buffer_ = new char[8192]; block_buffer_size_ = 8192; } BatchSequenceReader::~BatchSequenceReader() { delete[] block_buffer_; } bool BatchSequenceReader::LoadBlock(std::istream &ifs, size_t block_size) { ss_.clear(); ss_.str(""); if (block_buffer_size_ < block_size) { delete[] block_buffer_; block_buffer_ = new char[block_size]; block_buffer_size_ = block_size; } ifs.read(block_buffer_, block_size); if (! ifs && ifs.gcount() <= 0) return false; if (file_format_ == FORMAT_AUTO_DETECT) { switch (block_buffer_[0]) { case '@' : file_format_ = FORMAT_FASTQ; break; case '>' : file_format_ = FORMAT_FASTA; break; default: errx(EX_DATAERR, "sequence reader - unrecognized file format"); } } str_buffer_.assign(block_buffer_, ifs.gcount()); ss_ << str_buffer_; if (getline(ifs, str_buffer_)) ss_ << str_buffer_ << "\n"; if (file_format_ == FORMAT_FASTQ) { while (getline(ifs, str_buffer_)) { ss_ << str_buffer_ << "\n"; if (str_buffer_[0] == '@') break; } int lines_to_read = 0; if (getline(ifs, str_buffer_)) { ss_ << str_buffer_ << "\n"; lines_to_read = str_buffer_[0] == '@' ? 3 : 2; while (lines_to_read-- > 0 && getline(ifs, str_buffer_)) ss_ << str_buffer_ << "\n"; } } else { while (ifs) { if (ifs.peek() == '>') break; if (getline(ifs, str_buffer_)) ss_ << str_buffer_ << "\n"; } } return true; } bool BatchSequenceReader::LoadBatch(std::istream &ifs, size_t record_count) { ss_.clear(); ss_.str(""); auto valid = false; if (file_format_ == FORMAT_AUTO_DETECT) { if (! ifs) return false; switch (ifs.peek()) { case '@' : file_format_ = FORMAT_FASTQ; break; case '>' : file_format_ = FORMAT_FASTA; break; case EOF : return false; default: errx(EX_DATAERR, "sequence reader - unrecognized file format"); } valid = true; } size_t line_count = 0; while (record_count > 0 && ifs) { if (getline(ifs, str_buffer_)) line_count++; valid = true; if (file_format_ == FORMAT_FASTQ) { if (line_count % 4 == 0) record_count--; } else { if (ifs.peek() == '>') record_count--; } ss_ << str_buffer_ << "\n"; } return valid; } bool BatchSequenceReader::NextSequence(Sequence &seq) { return BatchSequenceReader::ReadNextSequence (ss_, seq, str_buffer_, file_format_); } bool BatchSequenceReader::ReadNextSequence(std::istream &is, Sequence &seq, std::string &str_buffer, SequenceFormat file_format) { if (! getline(is, str_buffer)) return false; StripString(str_buffer); if (file_format == FORMAT_AUTO_DETECT) { switch (str_buffer[0]) { case '@' : file_format = FORMAT_FASTQ; break; case '>' : file_format = FORMAT_FASTA; break; default: errx(EX_DATAERR, "sequence reader - unrecognized file format"); } } seq.format = file_format; if (seq.format == FORMAT_FASTQ) { if (str_buffer.empty()) // Allow empty line to end file return false; if (str_buffer[0] != '@') errx(EX_DATAERR, "malformed FASTQ file (exp. '@', saw \"%s\"), aborting", str_buffer.c_str()); } else if (seq.format == FORMAT_FASTA) { if (str_buffer[0] != '>') errx(EX_DATAERR, "malformed FASTA file (exp. '>', saw \"%s\"), aborting", str_buffer.c_str()); } else errx(EX_SOFTWARE, "illegal sequence format encountered in parsing"); seq.header.assign(str_buffer); auto first_whitespace_ch = str_buffer.find_first_of(" \t\r", 1); auto substr_len = first_whitespace_ch; if (substr_len != std::string::npos) substr_len--; if (str_buffer.size() > 1) seq.id.assign(str_buffer, 1, substr_len); else return false; if (seq.format == FORMAT_FASTQ) { if (! getline(is, str_buffer)) return false; StripString(str_buffer); seq.seq.assign(str_buffer); if (! getline(is, str_buffer)) // + line, discard return false; if (! getline(is, str_buffer)) return false; StripString(str_buffer); seq.quals.assign(str_buffer); } else if (seq.format == FORMAT_FASTA) { seq.quals.assign(""); seq.seq.assign(""); while (is && is.peek() != '>') { if (! getline(is, str_buffer)) return ! seq.seq.empty(); StripString(str_buffer); seq.seq.append(str_buffer); } } return true; } } // end namespace kraken2-2.1.2/src/seqreader.h000066400000000000000000000026751404614725400160050ustar00rootroot00000000000000/* * Copyright 2013-2021, Derrick Wood * * This file is part of the Kraken 2 taxonomic sequence classification system. */ #ifndef KRAKEN2_SEQREADER_H_ #define KRAKEN2_SEQREADER_H_ #include "kraken2_headers.h" namespace kraken2 { enum SequenceFormat { FORMAT_AUTO_DETECT, FORMAT_FASTA, FORMAT_FASTQ }; struct Sequence { SequenceFormat format; std::string header; // header line, including @/>, but not newline std::string id; // from first char. after @/> up to first whitespace std::string seq; std::string quals; // only meaningful for FASTQ seqs std::string &to_string(); private: std::string str_representation; }; class BatchSequenceReader { public: BatchSequenceReader(); ~BatchSequenceReader(); BatchSequenceReader(const BatchSequenceReader &rhs) = delete; BatchSequenceReader& operator=(const BatchSequenceReader &rhs) = delete; bool LoadBatch(std::istream &ifs, size_t record_count); bool LoadBlock(std::istream &ifs, size_t block_size); bool NextSequence(Sequence &seq); static bool ReadNextSequence(std::istream &is, Sequence &seq, std::string &str_buffer_ptr, SequenceFormat format = FORMAT_AUTO_DETECT); SequenceFormat file_format() { return file_format_; } private: std::stringstream ss_; std::string str_buffer_; // used to prevent realloc upon every load/parse SequenceFormat file_format_; char *block_buffer_; size_t block_buffer_size_; }; } #endif kraken2-2.1.2/src/taxonomy.cc000066400000000000000000000221231404614725400160340ustar00rootroot00000000000000/* * Copyright 2013-2021, Derrick Wood * * This file is part of the Kraken 2 taxonomic sequence classification system. */ #include "taxonomy.h" using std::string; using std::map; using std::set; using std::ifstream; using std::istringstream; using std::queue; using std::ofstream; namespace kraken2 { NCBITaxonomy::NCBITaxonomy(string nodes_filename, string names_filename) { ifstream nodes_file(nodes_filename.c_str()); if (! nodes_file.good()) err(EX_NOINPUT, "error opening %s", nodes_filename.c_str()); ifstream names_file(names_filename.c_str()); if (! names_file.good()) err(EX_NOINPUT, "error opening %s", names_filename.c_str()); string line; uint64_t node_id, parent_id; string name, rank, junk; const string delim = "\t|\t"; while (getline(nodes_file, line)) { line.pop_back(); // discard trailing line.pop_back(); // "\t|" size_t pos1, pos2; pos1 = 0; int field_ct = 0; bool finished = false; while (field_ct++ < 10 && ! finished) { // tokenizing loop pos2 = line.find(delim, pos1); string token; if (pos2 == string::npos) { token = line.substr(pos1); finished = true; } else { token = line.substr(pos1, pos2 - pos1); pos1 = pos2 + delim.size(); } switch (field_ct) { // 1-based counting case 1 : // node ID node_id = (uint64_t) stoul(token); if (node_id == 0) errx(EX_DATAERR, "attempt to create taxonomy w/ node ID == 0"); break; case 2 : // parent ID parent_id = (uint64_t) stoul(token); break; case 3 : // rank rank = token; finished = true; break; } } // end tokenizing loop if (node_id == 1) parent_id = 0; parent_map_[node_id] = parent_id; child_map_[parent_id].insert(node_id); rank_map_[node_id] = rank; known_ranks_.insert(rank); } while (getline(names_file, line)) { line.pop_back(); // discard trailing line.pop_back(); // "\t|" size_t pos1, pos2; pos1 = 0; int field_ct = 0; bool finished = false; while (field_ct++ < 10 && ! finished) { // tokenizing loop pos2 = line.find(delim, pos1); string token; if (pos2 == string::npos) { token = line.substr(pos1); finished = true; } else { token = line.substr(pos1, pos2 - pos1); pos1 = pos2 + delim.size(); } switch (field_ct) { // 1-based counting case 1 : // node ID node_id = (uint64_t) stoul(token); if (node_id == 0) errx(EX_DATAERR, "attempt to create taxonomy w/ node ID == 0"); break; case 2 : // name name = token; break; case 4 : // type of name - determines whether or not to record in map if (token == "scientific name") name_map_[node_id] = name; finished = true; break; } } // end tokenizing loop } marked_nodes_.insert(1); // mark root node } // Mark the given taxonomy node and all its unmarked ancestors void NCBITaxonomy::MarkNode(uint64_t taxid) { while (marked_nodes_.count(taxid) == 0) { marked_nodes_.insert(taxid); taxid = parent_map_[taxid]; } } void NCBITaxonomy::ConvertToKrakenTaxonomy(const char *filename) { Taxonomy taxo; TaxonomyNode zeroes_node = { 0, 0, 0, 0, 0, 0, 0 }; taxo.node_count_ = marked_nodes_.size() + 1; // +1 because 0 is illegal value taxo.nodes_ = new TaxonomyNode[taxo.node_count_]; taxo.nodes_[0] = zeroes_node; string name_data; string rank_data; // Because so many of the node rank names are shared, we only store one copy // of each rank map rank_offsets; for (string rank : known_ranks_) { rank_offsets[rank] = rank_data.size(); rank_data.append(rank.c_str(), rank.size() + 1); } uint64_t internal_node_id = 0; map external_id_map; // keys: ext. ID, values: int. ID external_id_map[0] = 0; external_id_map[1] = 1; // 1 is root in both NCBI and Kraken taxonomies // Breadth-first search through NCBI taxonomy, assigning internal IDs // in sequential order as nodes are encountered via BFS. queue bfs_queue; bfs_queue.push(1); while (! bfs_queue.empty()) { ++internal_node_id; uint64_t external_node_id = bfs_queue.front(); bfs_queue.pop(); external_id_map[external_node_id] = internal_node_id; TaxonomyNode node = zeroes_node; // just to initialize node to zeros node.parent_id = external_id_map[ parent_map_[external_node_id] ]; node.external_id = external_node_id; node.rank_offset = rank_offsets[ rank_map_[external_node_id] ]; node.name_offset = name_data.size(); node.first_child = internal_node_id + 1 + bfs_queue.size(); for (uint64_t child_node : child_map_[external_node_id]) { // Only add marked nodes to our internal tree if (marked_nodes_.count(child_node) > 0) { bfs_queue.push(child_node); node.child_count++; } } taxo.nodes_[internal_node_id] = node; string name = name_map_[external_node_id]; name_data.append(name.c_str(), name.size() + 1); } // end BFS while loop taxo.rank_data_ = new char[ rank_data.size() ]; memcpy(taxo.rank_data_, rank_data.data(), rank_data.size()); taxo.rank_data_len_ = rank_data.size(); taxo.name_data_ = new char[ name_data.size() ]; memcpy(taxo.name_data_, name_data.data(), name_data.size()); taxo.name_data_len_ = name_data.size(); taxo.WriteToDisk(filename); } Taxonomy::Taxonomy(const string &filename, bool memory_mapping) { Init(filename.c_str(), memory_mapping); } Taxonomy::Taxonomy(const char *filename, bool memory_mapping) { Init(filename, memory_mapping); } void Taxonomy::Init(const char *filename, bool memory_mapping) { if (memory_mapping) { taxonomy_data_file_.OpenFile(filename); file_backed_ = true; char *ptr = taxonomy_data_file_.fptr(); if (strncmp(FILE_MAGIC, ptr, strlen(FILE_MAGIC)) != 0) { errx(EX_DATAERR, "attempt to load taxonomy from malformed file %s", filename); } ptr += strlen(FILE_MAGIC); memcpy((char *) &node_count_, ptr, sizeof(node_count_)); ptr += sizeof(node_count_); memcpy((char *) &name_data_len_, ptr, sizeof(name_data_len_)); ptr += sizeof(name_data_len_); memcpy((char *) &rank_data_len_, ptr, sizeof(rank_data_len_)); ptr += sizeof(rank_data_len_); nodes_ = (TaxonomyNode *) ptr; ptr += sizeof(*nodes_) * node_count_; name_data_ = ptr; ptr += name_data_len_; rank_data_ = ptr; } else { std::ifstream ifs(filename); file_backed_ = false; char magic[strlen(FILE_MAGIC) + 1]; memset(magic, 0, strlen(FILE_MAGIC) + 1); ifs.read(magic, strlen(FILE_MAGIC)); if (strcmp(magic, FILE_MAGIC) != 0) errx(EX_DATAERR, "malformed taxonomy file %s", filename); ifs.read((char *) &node_count_, sizeof(node_count_)); ifs.read((char *) &name_data_len_, sizeof(name_data_len_)); ifs.read((char *) &rank_data_len_, sizeof(rank_data_len_)); nodes_ = new TaxonomyNode[node_count_]; ifs.read((char *) nodes_, sizeof(*nodes_) * node_count_); name_data_ = new char[name_data_len_]; ifs.read((char *) name_data_, name_data_len_); rank_data_ = new char[rank_data_len_]; ifs.read((char *) rank_data_, rank_data_len_); if (! ifs) errx(EX_DATAERR, "read exhausted taxonomy information in %s", filename); } } Taxonomy::~Taxonomy() { // If file backed, deleting would be... bad. if (! file_backed_) { delete[] nodes_; delete[] name_data_; delete[] rank_data_; } } // Logic here depends on higher nodes having smaller IDs // Idea: advance B tracker up tree, A is ancestor iff B tracker hits A bool Taxonomy::IsAAncestorOfB(uint64_t a, uint64_t b) const { if (! a || ! b) return false; while (b > a) { b = nodes_[b].parent_id; } return b == a; } // Logic here depends on higher nodes having smaller IDs // Idea: track two nodes, advance lower tracker up tree, trackers meet @ LCA uint64_t Taxonomy::LowestCommonAncestor(uint64_t a, uint64_t b) const { if (! a || ! b) // LCA(x,0) = LCA(0,x) = x return a ? a : b; while (a != b) { if (a > b) a = nodes_[a].parent_id; else b = nodes_[b].parent_id; } return a; } // Dump binary data to file void Taxonomy::WriteToDisk(const char *filename) const { ofstream taxo_file(filename); taxo_file.write(FILE_MAGIC, strlen(FILE_MAGIC)); taxo_file.write((char *) &node_count_, sizeof(node_count_)); taxo_file.write((char *) &name_data_len_, sizeof(name_data_len_)); taxo_file.write((char *) &rank_data_len_, sizeof(rank_data_len_)); taxo_file.write((char *) nodes_, sizeof(*nodes_) * node_count_); taxo_file.write(name_data_, name_data_len_); taxo_file.write(rank_data_, rank_data_len_); if (! taxo_file.good()) errx(EX_OSERR, "error writing taxonomy to %s", filename); taxo_file.close(); } void Taxonomy::GenerateExternalToInternalIDMap() { external_to_internal_id_map_[0] = 0; for (size_t i = 1; i < node_count_; i++) { external_to_internal_id_map_[nodes_[i].external_id] = i; } } } kraken2-2.1.2/src/taxonomy.h000066400000000000000000000054241404614725400157030ustar00rootroot00000000000000/* * Copyright 2013-2021, Derrick Wood * * This file is part of the Kraken 2 taxonomic sequence classification system. */ #ifndef KRAKEN2_TAXONOMY_H_ #define KRAKEN2_TAXONOMY_H_ #include "mmap_file.h" #include "kraken2_headers.h" namespace kraken2 { // ID of a taxonomy node is implicit from its position in the node array struct TaxonomyNode { uint64_t parent_id; // Must be lower-numbered node uint64_t first_child; // Must be higher-numbered node uint64_t child_count; // Children of a node are in contiguous block uint64_t name_offset; // Location of name in name data super-string uint64_t rank_offset; // Location of rank in rank data super-string uint64_t external_id; // Taxonomy ID for reporting purposes (usually NCBI) uint64_t godparent_id; // Reserved for future use to enable faster traversal }; class NCBITaxonomy { public: NCBITaxonomy(std::string nodes_filename, std::string names_filename); void MarkNode(uint64_t taxid); void ConvertToKrakenTaxonomy(const char *filename); private: std::map parent_map_; std::map name_map_; std::map rank_map_; std::map > child_map_; std::set marked_nodes_; std::set known_ranks_; }; class Taxonomy { public: Taxonomy(const std::string &filename, bool memory_mapping=false); Taxonomy(const char *filename, bool memory_mapping=false); Taxonomy() : file_backed_(false), nodes_(nullptr), node_count_(0), name_data_(nullptr), name_data_len_(0), rank_data_(nullptr), rank_data_len_(0) { } ~Taxonomy(); inline const TaxonomyNode *nodes() const { return nodes_; } inline const char *name_data() const { return name_data_; } inline const char *rank_data() const { return rank_data_; } inline const size_t node_count() const { return node_count_; } bool IsAAncestorOfB(uint64_t a, uint64_t b) const; uint64_t LowestCommonAncestor(uint64_t a, uint64_t b) const; void WriteToDisk(const char *filename) const; void MoveToMemory(); void GenerateExternalToInternalIDMap(); uint64_t GetInternalID(uint64_t external_id) const { if (external_to_internal_id_map_.count(external_id) == 0) return 0; return external_to_internal_id_map_.at(external_id); } private: void Init(const char *filename, bool memory_mapping); char const * const FILE_MAGIC = "K2TAXDAT"; MMapFile taxonomy_data_file_; bool file_backed_; TaxonomyNode *nodes_; size_t node_count_; char *name_data_; size_t name_data_len_; char *rank_data_; size_t rank_data_len_; std::unordered_map external_to_internal_id_map_; friend void NCBITaxonomy::ConvertToKrakenTaxonomy(const char *filename); }; } #endif // KRAKEN_TAXONOMY_H_ kraken2-2.1.2/src/utilities.cc000066400000000000000000000022221404614725400161670ustar00rootroot00000000000000/* * Copyright 2013-2021, Derrick Wood * * This file is part of the Kraken 2 taxonomic sequence classification system. */ #include "utilities.h" using std::vector; using std::string; namespace kraken2 { void ExpandSpacedSeedMask (uint64_t &spaced_seed_mask, const int bit_expansion_factor) { uint64_t new_mask = 0; uint64_t bits = 1 << bit_expansion_factor; bits--; for (int i = 64 / bit_expansion_factor - 1; i >= 0; i--) { new_mask <<= bit_expansion_factor; if ((spaced_seed_mask >> i) & 1) new_mask |= bits; } spaced_seed_mask = new_mask; } vector SplitString (const string &str, const string &delim, const size_t max_fields) { vector output; size_t pos1, pos2; pos1 = 0; size_t field_ct = 0; bool finished = false; while (field_ct++ < max_fields && ! finished) { // tokenizing loop pos2 = str.find(delim, pos1); string token; if (pos2 == string::npos) { token = str.substr(pos1); finished = true; } else { token = str.substr(pos1, pos2 - pos1); pos1 = pos2 + delim.size(); } output.push_back(token); } return output; } } kraken2-2.1.2/src/utilities.h000066400000000000000000000015351404614725400160370ustar00rootroot00000000000000/* * Copyright 2013-2021, Derrick Wood * * This file is part of the Kraken 2 taxonomic sequence classification system. */ #ifndef KRAKEN2_UTILITIES_H_ #define KRAKEN2_UTILITIES_H_ #include "kraken2_headers.h" // Functions used by 2+ programs that I couldn't think of a better place for. namespace kraken2 { // Turns a simple bitstring of 0s and 1s into one where the 1s and 0s are expanded, // e.g. 010110 expanded by a factor of 2 would become 001100111100 // Allows specification of the spaced seed to represent positions in the sequence // rather than actual bits in the internal representation void ExpandSpacedSeedMask(uint64_t &spaced_seed_mask, const int bit_expansion_factor); std::vector SplitString(const std::string &str, const std::string &delim = "\t", const size_t max_fields = (size_t) -1); } #endif