pax_global_header00006660000000000000000000000064120472407610014515gustar00rootroot0000000000000052 comment=3614edff8298e390907e6ddd7f9f031511057f7d PyNAST-1.2/000077500000000000000000000000001204724076100124355ustar00rootroot00000000000000PyNAST-1.2/ChangeLog000066400000000000000000000010511204724076100142040ustar00rootroot00000000000000PyNAST 1.2 - (9 Nov 2012) ========================= * Required PyCogent version is now 1.5.3. * If muscle is installed, required version is now 3.8.31. PyNAST 1.1 - (31 Mar 2010) ========================== * Switch from BLAST for database search to uclust for database search. BLAST is no longer available for database searching. * Switch from BLAST as the default pairwise alignment to uclust as the default pairwise aligner. * Addition of setup.py to facilitate installation. PyNAST 1.0 - (25 Jan 2010) ========================== * Initial release PyNAST-1.2/INSTALL000066400000000000000000000003641204724076100134710ustar00rootroot00000000000000Install notes are provided at http://qiime.org/pynast/install.html or in PyNAST/doc/install.rst. You should use the former if you're installing a release version of PyNAST, and the latter if you're installing the development version of PyNAST. PyNAST-1.2/README.md000066400000000000000000000026741204724076100137250ustar00rootroot00000000000000## PyNAST: Python Nearest Alignment Space Termination tool The official PyNAST source code repository. For details on PyNAST, see http://qiime.org/pynast. This documentation will refer to the latest release version of PyNAST. If you're working with a development version of PyNAST, you should refer to the documentation in PyNAST/doc/. See the [QIIME GitHub organization](https://github.com/qiime) for related software projects and data. ### Stay up-to-date on PyNAST news Subscribing to the [PyNAST blog](http://pynast.wordpress.com) is the best way to keep up-to-date on news related to PyNAST. You can subscribe via RSS or e-mail on the front page of the blog. This is a very low traffic list, with currently around one e-mail per month or less. The PyNAST blog is the primary means by which we will communicate information on bugs, new releases, and news to our users, so we highly recommend subscribing. We won't share subscriber information with anyone ever. ### Citing PyNAST If you make use of [PyNAST](http://qiime.org/pynast) in published work, please cite: **PyNAST: a flexible tool for aligning sequences to a template alignment.** J. Gregory Caporaso, Kyle Bittinger, Frederic D. Bushman, Todd Z. DeSantis, Gary L. Andersen, and Rob Knight. January 15, 2010, DOI 10.1093/bioinformatics/btp636. Bioinformatics 26: 266-267. ### Need help? For [PyNAST](http://qiime.org/pynast) support, you can contact [Greg Caporaso](mailto:gregcaporaso@gmail.com). PyNAST-1.2/doc/000077500000000000000000000000001204724076100132025ustar00rootroot00000000000000PyNAST-1.2/doc/Makefile000066400000000000000000000060661204724076100146520ustar00rootroot00000000000000# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = _build # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . .PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " changes to make an overview of all changed/added/deprecated items" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: -rm -rf $(BUILDDIR)/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/PyNAST.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/PyNAST.qhc" latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ "run these through (pdf)latex." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." PyNAST-1.2/doc/_static/000077500000000000000000000000001204724076100146305ustar00rootroot00000000000000PyNAST-1.2/doc/_static/default.css000066400000000000000000000177521204724076100170020ustar00rootroot00000000000000/** * Alternate Sphinx design * Originally created by Armin Ronacher for Werkzeug, adapted by Georg Brandl. */ body { font-family: 'Lucida Grande', 'Lucida Sans Unicode', 'Geneva', 'Verdana', sans-serif; font-size: 14px; letter-spacing: -0.01em; line-height: 150%; text-align: center; /*background-color: #AFC1C4; */ background-color: #BFD1D4; color: black; padding: 0; border: 1px solid #aaa; margin: 0px 80px 0px 80px; min-width: 740px; } a { color: #CA7900; text-decoration: none; } a:hover { color: #2491CF; } pre { font-family: 'Consolas', 'Deja Vu Sans Mono', 'Bitstream Vera Sans Mono', monospace; font-size: 0.95em; letter-spacing: 0.015em; padding: 0.5em; border: 1px solid #ccc; background-color: #f8f8f8; } td.linenos pre { padding: 0.5em 0; border: 0; background-color: transparent; color: #aaa; } table.highlighttable { margin-left: 0.5em; } table.highlighttable td { padding: 0 0.5em 0 0.5em; } cite, code, tt { font-family: 'Consolas', 'Deja Vu Sans Mono', 'Bitstream Vera Sans Mono', monospace; font-size: 0.95em; letter-spacing: 0.01em; } hr { border: 1px solid #abc; margin: 2em; } tt { background-color: #f2f2f2; border-bottom: 1px solid #ddd; color: #333; } tt.descname { background-color: transparent; font-weight: bold; font-size: 1.2em; border: 0; } tt.descclassname { background-color: transparent; border: 0; } tt.xref { background-color: transparent; font-weight: bold; border: 0; } a tt { background-color: transparent; font-weight: bold; border: 0; color: #CA7900; } a tt:hover { color: #2491CF; } dl { margin-bottom: 15px; } dd p { margin-top: 0px; } dd ul, dd table { margin-bottom: 10px; } dd { margin-top: 3px; margin-bottom: 10px; margin-left: 30px; } .refcount { color: #060; } dt:target, .highlight { background-color: #fbe54e; } dl.class, dl.function { border-top: 2px solid #888; } dl.method, dl.attribute { border-top: 1px solid #aaa; } dl.glossary dt { font-weight: bold; font-size: 1.1em; } pre { line-height: 120%; } pre a { color: inherit; text-decoration: underline; } .first { margin-top: 0 !important; } div.document { background-color: white; text-align: left; background-image: url(contents.png); background-repeat: repeat-x; } /* div.documentwrapper { width: 100%; } */ div.clearer { clear: both; } div.related h3 { display: none; } div.related ul { background-image: url(navigation.png); height: 2em; list-style: none; border-top: 1px solid #ddd; border-bottom: 1px solid #ddd; margin: 0; padding-left: 10px; } div.related ul li { margin: 0; padding: 0; height: 2em; float: left; } div.related ul li.right { float: right; margin-right: 5px; } div.related ul li a { margin: 0; padding: 0 5px 0 5px; line-height: 1.75em; color: #EE9816; } div.related ul li a:hover { color: #3CA8E7; } div.body { margin: 0; padding: 0.5em 20px 20px 20px; } div.bodywrapper { margin: 0 240px 0 0; border-right: 1px solid #ccc; } div.body a { text-decoration: underline; } div.sphinxsidebar { margin: 0; padding: 0.5em 15px 15px 0; width: 210px; float: right; text-align: left; /* margin-left: -100%; */ } div.sphinxsidebar h4, div.sphinxsidebar h3 { margin: 1em 0 0.5em 0; font-size: 0.9em; padding: 0.1em 0 0.1em 0.5em; color: white; border: 1px solid #86989B; background-color: #AFC1C4; } div.sphinxsidebar ul { padding-left: 1.5em; margin-top: 7px; list-style: none; padding: 0; line-height: 130%; } div.sphinxsidebar ul ul { list-style: square; margin-left: 20px; } p { margin: 0.8em 0 0.5em 0; } p.rubric { font-weight: bold; } h1 { margin: 0; padding: 0.7em 0 0.3em 0; font-size: 1.5em; color: #11557C; } h2 { margin: 1.3em 0 0.2em 0; font-size: 1.35em; padding: 0; } h3 { margin: 1em 0 -0.3em 0; font-size: 1.2em; } h1 a, h2 a, h3 a, h4 a, h5 a, h6 a { color: black!important; } h1 a.anchor, h2 a.anchor, h3 a.anchor, h4 a.anchor, h5 a.anchor, h6 a.anchor { display: none; margin: 0 0 0 0.3em; padding: 0 0.2em 0 0.2em; color: #aaa!important; } h1:hover a.anchor, h2:hover a.anchor, h3:hover a.anchor, h4:hover a.anchor, h5:hover a.anchor, h6:hover a.anchor { display: inline; } h1 a.anchor:hover, h2 a.anchor:hover, h3 a.anchor:hover, h4 a.anchor:hover, h5 a.anchor:hover, h6 a.anchor:hover { color: #777; background-color: #eee; } table { border-collapse: collapse; margin: 0 -0.5em 0 -0.5em; } table td, table th { padding: 0.2em 0.5em 0.2em 0.5em; } div.footer { background-color: #E3EFF1; color: #86989B; padding: 3px 8px 3px 0; clear: both; font-size: 0.8em; text-align: right; } div.footer a { color: #86989B; text-decoration: underline; } div.pagination { margin-top: 2em; padding-top: 0.5em; border-top: 1px solid black; text-align: center; } div.sphinxsidebar ul.toc { margin: 1em 0 1em 0; padding: 0 0 0 0.5em; list-style: none; } div.sphinxsidebar ul.toc li { margin: 0.5em 0 0.5em 0; font-size: 0.9em; line-height: 130%; } div.sphinxsidebar ul.toc li p { margin: 0; padding: 0; } div.sphinxsidebar ul.toc ul { margin: 0.2em 0 0.2em 0; padding: 0 0 0 1.8em; } div.sphinxsidebar ul.toc ul li { padding: 0; } div.admonition, div.warning { font-size: 0.9em; margin: 1em 0 0 0; border: 1px solid #86989B; background-color: #f7f7f7; } div.admonition p, div.warning p { margin: 0.5em 1em 0.5em 1em; padding: 0; } div.admonition pre, div.warning pre { margin: 0.4em 1em 0.4em 1em; } div.admonition p.admonition-title, div.warning p.admonition-title { margin: 0; padding: 0.1em 0 0.1em 0.5em; color: white; border-bottom: 1px solid #86989B; font-weight: bold; background-color: #AFC1C4; } div.warning { border: 1px solid #940000; } div.warning p.admonition-title { background-color: #CF0000; border-bottom-color: #940000; } div.admonition ul, div.admonition ol, div.warning ul, div.warning ol { margin: 0.1em 0.5em 0.5em 3em; padding: 0; } div.versioninfo { margin: 1em 0 0 0; border: 1px solid #ccc; background-color: #DDEAF0; padding: 8px; line-height: 1.3em; font-size: 0.9em; } a.headerlink { color: #c60f0f!important; font-size: 1em; margin-left: 6px; padding: 0 4px 0 4px; text-decoration: none!important; visibility: hidden; } h1:hover > a.headerlink, h2:hover > a.headerlink, h3:hover > a.headerlink, h4:hover > a.headerlink, h5:hover > a.headerlink, h6:hover > a.headerlink, dt:hover > a.headerlink { visibility: visible; } a.headerlink:hover { background-color: #ccc; color: white!important; } table.indextable td { text-align: left; vertical-align: top; } table.indextable dl, table.indextable dd { margin-top: 0; margin-bottom: 0; } table.indextable tr.pcap { height: 10px; } table.indextable tr.cap { margin-top: 10px; background-color: #f2f2f2; } img.toggler { margin-right: 3px; margin-top: 3px; cursor: pointer; } img.inheritance { border: 0px } form.pfform { margin: 10px 0 20px 0; } table.contentstable { width: 90%; } table.contentstable p.biglink { line-height: 150%; } a.biglink { font-size: 1.3em; } span.linkdescr { font-style: italic; padding-top: 5px; font-size: 90%; } ul.search { margin: 10px 0 0 20px; padding: 0; } ul.search li { padding: 5px 0 5px 20px; background-image: url(file.png); background-repeat: no-repeat; background-position: 0 7px; } ul.search li a { font-weight: bold; } ul.search li div.context { color: #888; margin: 2px 0 0 30px; text-align: left; } ul.keywordmatches li.goodmatch a { font-weight: bold; } PyNAST-1.2/doc/_static/google_feed.js000066400000000000000000000017121204724076100174260ustar00rootroot00000000000000google.load("feeds", "1"); function initialize() { var feed = new google.feeds.Feed("http://pynast.wordpress.com/feed/"); feed.load(function(result) { if (!result.error) { var container = document.getElementById("feed"); for (var i = 0; i < 5; i++) { var entry = result.feed.entries[i]; var tr =document.createElement('tr') var td =document.createElement('td') var link = document.createElement('a'); link.setAttribute('href', entry.link); var dot = document.createElement('b'); dot.setAttribute('style', 'color: #BFD1D4; font-size: 10pt'); var dottext=document.createTextNode('• ') dot.appendChild(dottext) link.appendChild(dot) var title=document.createTextNode(entry.title) link.appendChild(title) td.appendChild(link) tr.appendChild(td) container.appendChild(tr); } } }); } google.setOnLoadCallback(initialize);PyNAST-1.2/doc/_templates/000077500000000000000000000000001204724076100153375ustar00rootroot00000000000000PyNAST-1.2/doc/_templates/layout.html000066400000000000000000000032141204724076100175420ustar00rootroot00000000000000{% extends "!layout.html" %} {% block rootrellink %}
  • home
  • search 
  • {% endblock %} {% block extrahead %} {% endblock %} {% block relbar1 %}

    PyNAST: Python Nearest Alignment Space Termination tool

    {{ super() }} {% endblock %} {# put the sidebar before the body #} {% block sidebartoc %}

    PyNAST News and Announcements

    {{ super() }} {% endblock %} {% block sidebar1 %}{{ sidebar() }}{% endblock %} {% block sidebar2 %}{% endblock %} {# include the Google Analytics Tracker #} {% block footer %} {{ super() }} {% endblock %} PyNAST-1.2/doc/conf.py000066400000000000000000000142221204724076100145020ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # PyNAST documentation build configuration file, created by # sphinx-quickstart on Mon Jan 25 11:42:17 2010. # # This file is execfile()d with the current directory set to its containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import sys, os # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. #sys.path.append(os.path.abspath('.')) # -- General configuration ----------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = [] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix of source filenames. source_suffix = '.rst' # The encoding of source files. #source_encoding = 'utf-8' # The master toctree document. master_doc = 'index' # General information about the project. project = u'PyNAST' copyright = u'2010, Greg Caporaso' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = '1.2' # The full version, including alpha/beta/rc tags. release = '1.2' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. #language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: #today = '' # Else, today_fmt is used as the format for a strftime call. #today_fmt = '%B %d, %Y' # List of documents that shouldn't be included in the build. #unused_docs = [] # List of directories, relative to source directory, that shouldn't be searched # for source files. exclude_trees = ['_build'] # The reST default role (used for this markup: `text`) to use for all documents. #default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. #add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). #add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. #show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. #modindex_common_prefix = [] # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. Major themes that come with # Sphinx are currently 'default' and 'sphinxdoc'. html_theme = 'default' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. #html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. #html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". #html_title = None # A shorter title for the navigation bar. Default is the same as html_title. #html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. #html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. #html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. #html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. #html_use_smartypants = True # Custom sidebar templates, maps document names to template names. #html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. #html_additional_pages = {} # If false, no module index is generated. #html_use_modindex = False # If false, no index is generated. html_use_index = False # If true, the index is split into individual pages for each letter. #html_split_index = False # If true, links to the reST sources are added to the pages. html_show_sourcelink = False # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. #html_use_opensearch = '' # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). #html_file_suffix = '' # Output file base name for HTML help builder. htmlhelp_basename = 'PyNASTdoc' # -- Options for LaTeX output -------------------------------------------------- # The paper size ('letter' or 'a4'). #latex_paper_size = 'letter' # The font size ('10pt', '11pt' or '12pt'). #latex_font_size = '10pt' # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ('index', 'PyNAST.tex', u'PyNAST Documentation', u'Greg Caporaso', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. #latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. #latex_use_parts = False # Additional stuff for the LaTeX preamble. #latex_preamble = '' # Documents to append as an appendix to all manuals. #latex_appendices = [] # If false, no module index is generated. #latex_use_modindex = False PyNAST-1.2/doc/index.rst000066400000000000000000000067261204724076100150560ustar00rootroot00000000000000.. PyNAST documentation master file, created by sphinx-quickstart on Mon Jan 25 11:42:17 2010. Downloading PyNAST: Latest stable release ========================================= You can download the latest `stable release of PyNAST here `_ and the `PyNAST OS X GUI (still PyNAST 1.0) here `_. Downloading PyNAST: Development version ======================================= If you want access to the latest-and-greatest features of PyNAST and can tolerate some instability we recommend that you check out the latest version from GitHub. You can do that with the following command: :: git clone git://github.com/qiime/pynast.git PyNAST Installing PyNAST ================= `Notes on installing and using the PyNAST command line application. `_ `Notes on installing and using the PyNAST 1.0 Mac OS X GUI. `_ Stay up-to-date on PyNAST news ============================== Subscribing to the PyNAST blog_ is the best way to keep up-to-date on news related to PyNAST. You can subscribe via RSS or e-mail on the front page of the blog. This is a very low traffic list, with currently around one e-mail per month or less. The PyNAST blog is the primary means by which we will communicate information on bugs, new releases, and news to our users, so we highly recommend subscribing. We won't share subscriber information with anyone ever. About PyNAST ============ PyNAST_ is a reimplementation of the NAST_ sequence aligner, which has become a popular tool for adding new 16s rDNA sequences to existing 16s rDNA alignments. This reimplementation is more flexible, faster, and easier to install and maintain than the original NAST implementation. PyNAST_ is built using the PyCogent Bioinformatics Toolkit. The first versions of PyNAST (through PyNAST 1.0) were written to exactly match the results of the original NAST algorithm. Beginning with the post-PyNAST 1.0 development code, PyNAST no longer exactly matches the NAST output but is instead focused on getting better alignments. Users who wish to exactly match the results of NAST should download PyNAST 1.0. Given a set of sequences and a template alignment, PyNAST_ will align the input sequences against the template alignment, and return a multiple sequence alignment which contains the same number of positions (or columns) as the template alignment. This facilitates the analysis of new sequences in the context of existing alignments, and additional data derived from existing alignments such as phylogenetic trees. Because any protein or nucleic acid sequences and template alignments can be provided, PyNAST_ is not limited to the analysis of 16s rDNA sequences. PyNAST_ is presented in an open access `Bioinformatics Applications Note `_. Citing PyNAST ============= If you make use of PyNAST_ in published work, please cite: **PyNAST: a flexible tool for aligning sequences to a template alignment.** J. Gregory Caporaso, Kyle Bittinger, Frederic D. Bushman, Todd Z. DeSantis, Gary L. Andersen, and Rob Knight. January 15, 2010, DOI 10.1093/bioinformatics/btp636. Bioinformatics 26: 266-267. Need help? ========== For PyNAST_ support, you can contact `Greg Caporaso `_. .. _PyNAST: http://qiime.org/pynast .. _blog: http://pynast.wordpress.com .. _NAST: http://nar.oxfordjournals.org/cgi/content/full/34/suppl_2/W394 PyNAST-1.2/doc/install.rst000066400000000000000000000146301204724076100154060ustar00rootroot00000000000000.. install_: ******************************************************** Installing and using the PyNAST command line application ******************************************************** Downloading PyNAST ================== You can download the latest stable release of PyNAST `here `_. You can download the latest development version of PyNAST here with the following command: :: git clone git://github.com/qiime/pynast.git PyNAST Required software ================= PyNAST_ is built on the PyCogent_ package, and uses uclust_. You must have PyCogent `1.5.3 `_ and uclust `v1.1.579 `_ or uclust `v1.2.22q `_ installed to run PyNAST_. You should first obtain these software packages, and install them according to the instructions provided by their authors. Optional software ================= If you'd like to perform pairwise alignments using BLAST_, MUSCLE_, MAFFT_, or ClustalW_, you must have those programs installed on your machine and in your system path. Currently tested versions are BLAST_ 2.2.22, MUSCLE_ v3.8.31, MAFFT v6.602b (**MAFFT v6.925b is known to NOT work with PyNAST**), and ClustalW 1.81 or 1.83. Note that PyNAST makes use of the legacy BLAST software, not BLAST+. Installation steps ================== #. Download PyCogent_ 1.5.3 (`src `_) and its dependencies, Python_ 2.6 or greater (but less than Python 3.0) and NumPy 1.3.0 or greater. PyNAST was tested with Python 2.7.1 and 2.7.2 and NumPy 1.5.1, though other versions may work as well. #. Download and install uclust_. Binaries are available, or you can install from source (`uclust v1.1.579 binaries and src `_ or `uclust v1.2.22q binaries and src `_). #. From your command terminal on an OS X or Linux system, change to the directory where you wish to install PyNAST_. You can either download `PyNAST 1.2 from here `_, or if you want the latest development version you can checkout the latest version of PyNAST_ from the GitHub repository with the command: :: git clone git://github.com/qiime/pynast.git PyNAST If you downloaded from GitHub, you will have a new folder in the current working directory called ``PyNAST``. If you downloaded PyNAST-1.2, after untar/unzipping ``PyNAST-1.2.tar.gz``, you will have a new directory named ``PyNAST-1.2``. **For consistency, all instructions below will refer to this directory as** ``PyNAST``. You may choose to rename ``PyNAST-1.2`` as ``PyNAST``. #. Run setup.py. You may need to do this as root (see :ref:`customizing_your_installation` below if this is not an option, or if you'd like to install the PyNAST library code and/or scripts in non-default locations): :: cd PyNAST python setup.py install #. Change to the PyNAST/tests directory: :: cd tests #. Run the test suite with the following command. All tests should pass, unless you don't have BLAST_, MUSCLE_, MAFFT_, and/or ClustalW_ installed. These are optional external software packages, and you will get one test failure per missing software package. You can ignore test failures which indicate that these programs cannot be found. :: python all_tests.py #. If all tests pass, you can get the usage information for the command line version of PyNAST_ with the following command anywhere on your system: :: cd pynast -h .. _customizing_your_installation: Customizing your installation ============================= PyNAST consists of library code and a script. By default the script will be installed in ``/usr/local/bin``. This can be customized with the ``--install_scripts`` option: :: python setup.py install --install-scripts=/home/pynast_user/bin/ You can similarly install the library code in an alternate location using the ``--install-purelib`` option: :: python setup.py install --install-purelib=/home/pynast_user/lib/ A combination of these options is also possible: :: python setup.py install --install-scripts=/home/pynast_user/bin/ --install-purelib=/home/pynast_user/lib/ For a complete discussion of customizations related to the setup.py script, `see this page `_. If you specify an alternate directory for ``--install-purelib``, you'll need to ensure that python knows where to look for the pynast module. Following the example above, you would do this with the following commands: :: echo "export PYTHONPATH=/home/pynast_user/lib/:$PYTHONPATH" >> /home/pynast_user/.bashrc source /home/pynast_user/.bashrc Similarly, if you specify an alternate directory for ``--install-scripts``, you'll need to ensure that the shell knows where to look for executable files. Following the example above, you would do this with the following commands: :: echo "export PATH=/home/pynast_user/bin/:$PATH" >> /home/pynast_user/.bashrc source /home/pynast_user/.bashrc Using the PyNAST command line application ========================================= After installing the PyNAST_ software as described above, you should download the sample candidate sequences and template alignment. You can then apply the PyNAST_ command line tool as follows: :: pynast -i candidate_seqs_sample.fasta -t template_sample.fasta This will result in three files being written to the current working directory: :file:`candidate_seqs_sample_pynast_aligned.fasta`, :file:`candidate_seqs_sample_pynast_log.txt`, and :file:`candidate_seqs_sample_pynast_fail.fasta`, which correspond to the alignment, the run log, and the list of sequences which failed to align, respectively. To get usage information for the PyNAST_ command line application run: :: pynast -h .. _PyCogent: http://pycogent.sourceforge.net .. _Python: http://www.python.org .. _NumPy: http://numpy.scipy.org/ .. _MUSCLE: http://www.drive5.com/muscle/ .. _PyNAST: http://qiime.org/pynast .. _ClustalW: http://www.ebi.ac.uk/Tools/clustalw2/index.html .. _BLAST: ftp://ftp.ncbi.nlm.nih.gov/blast/executables/LATEST/ .. _MAFFT: http://align.bmr.kyushu-u.ac.jp/mafft/online/server/ .. _uclust: http://www.drive5.com/uclust/ PyNAST-1.2/doc/install_gui.rst000066400000000000000000000046261204724076100162560ustar00rootroot00000000000000.. Install GUI ************************************* Installing and using the Mac OS X GUI ************************************* Download the draft version of the `PyNAST OS X GUI here `_. Unzip the downloaded file to extract the PyNAST_ application. Depending on your system settings, the PyNAST_ application will either be called PyNAST or PyNAST.app. Ensure that your system meets the requirements listed below. If all requirements are met, double-click on the PyNAST_ application to launch PyNAST_. Note that YOU DO NOT NEED PyCogent_ or the PyNAST_ API/command line interface installed to use the PyNAST_ GUI. Requirements for the PyNAST GUI =============================== * An Intel Mac running OS X 10.5 (Leopard). * Python_ 2.5 or greater (but less than Python 3.0) and NumPy_ 1.3.0 or greater. * ``blastall``, ``formatdb``, and ``bl2seq`` installed in ``/usr/bin/``, ``/usr/local/bin/``, or ``$HOME/bin``. These are all part of NCBI's 'legacy' BLAST_ package, *NOT the BLAST+ package*. Versions 2.2.16 through 2.2.21 have been tested extensively with PyNAST_, but other versions should work. (Due to current limitations of the PyNAST_ GUI you need to have the required external software installed in one of these specific locations on your system.) Optional for the PyNAST GUI =========================== * MUSCLE_ installed in ``/usr/bin/``, ``/usr/local/bin/``, or ``$HOME/bin`` if you want to use that for pairwise aligning. * ClustalW_ installed in ``/usr/bin/``, ``/usr/local/bin/``, or ``$HOME/bin`` if you want to use that for pairwise aligning. Limitations in the draft release of the PyNAST GUI ================================================== * Not all pairwise aligners are available. Missing options are pair_hmm and MAFFT. * Users must place external executables in specific locations for PyNAST_ to find them, rather than PyNAST_ looking in user-defined locations. This will be addressed by adding a preferences box where users can define where these executables are stored. * No help text within the application. .. _PyCogent: http://pycogent.sourceforge.net .. _Python: http://www.python.org .. _NumPy: http://numpy.scipy.org/ .. _MUSCLE: http://www.drive5.com/muscle/ .. _PyNAST: http://qiime.org/pynast .. _ClustalW: http://www.ebi.ac.uk/Tools/clustalw2/index.html .. _BLAST: ftp://ftp.ncbi.nlm.nih.gov/blast/executables/LATEST/ PyNAST-1.2/pynast/000077500000000000000000000000001204724076100137535ustar00rootroot00000000000000PyNAST-1.2/pynast/__init__.py000066400000000000000000000005351204724076100160670ustar00rootroot00000000000000#!/usr/bin/env python __author__ = "The PyNAST Development Team" __copyright__ = "Copyright 2010, The QIIME Project" __credits__ = ["Greg Caporaso", "Kyle Bittinger", "Rob Knight"] __license__ = "GPL" __version__ = "1.2" __maintainer__ = "Greg Caporaso" __email__ = "gregcaporaso@gmail.com" __status__ = "Development" __all__ = ['logger','util'] PyNAST-1.2/pynast/logger.py000066400000000000000000000023551204724076100156110ustar00rootroot00000000000000#!/usr/bin/env python import logging __author__ = "Kyle Bittinger" __copyright__ = "Copyright 2010, The PyNAST Project" __credits__ = ["Greg Caporaso", "Kyle Bittinger"] __license__ = "GPL" __version__ = "1.2" __maintainer__ = "Kyle Bittinger" __email__ = "kylebittinger@gmail.com" __status__ = "Development" class NastLogger: __LABELS = [ "candidate sequence ID", "candidate nucleotide count", "errors", "template ID", "BLAST percent identity to template", "candidate nucleotide count post-NAST", ] def __init__(self, filename=None): self.Filename = filename self.__logger = self.__init_logger() self.record(*self.__LABELS) def __init_logger(self): if self.Filename is not None: handler = logging.FileHandler(self.Filename, mode='w') else: class NullHandler(logging.Handler): def emit(self, record): pass handler = NullHandler() logger = logging.getLogger("PyNAST logger") logger.addHandler(handler) logger.setLevel(logging.INFO) return logger def record(self, *args): log_entry = '\t'.join(map(str, args)) self.__logger.info(log_entry) PyNAST-1.2/pynast/util.py000077500000000000000000000750061204724076100153150ustar00rootroot00000000000000#!/usr/bin/env python from __future__ import division from os import system, remove, popen from os.path import exists from shutil import copy as copy_file from glob import glob from cogent import DNA, LoadSeqs, Sequence from cogent.util.misc import remove_files from cogent.core.alignment import SequenceCollection, DenseAlignment from cogent.align.align import make_dna_scoring_dict, global_pairwise from cogent.app.blast import blastn from cogent.app.formatdb import build_blast_db_from_seqs, \ build_blast_db_from_fasta_path from cogent.app.muscle_v38 import align_unaligned_seqs as muscle_align_unaligned_seqs from cogent.app.mafft import align_unaligned_seqs as mafft_align_unaligned_seqs from cogent.app.clustalw import align_unaligned_seqs as clustal_align_unaligned_seqs from cogent.app.util import get_tmp_filename from cogent.app.uclust import uclust_search_and_align_from_fasta_filepath from cogent.parse.blast import BlastResult from cogent.parse.fasta import MinimalFastaParser from pynast.logger import NastLogger __author__ = "Greg Caporaso" __copyright__ = "Copyright 2010, The PyNAST Project" __credits__ = ["Greg Caporaso", "Kyle Bittinger", "Jai Ram Rideout"] __license__ = "GPL" __version__ = "1.2" __maintainer__ = "Greg Caporaso" __email__ = "gregcaporaso@gmail.com" __status__ = "Development" """ PyNAST is a complete rewrite of the NAST algorithm written in python. While PyNAST 1.0 strived to exactly match the results of the original NAST algorithm, the later version (beginning with the post-1.0 development code) no longer exactly matches the the original NAST algorithm, hopefully in favor of better results. PyNAST depends on PyCogent, NumPy, Python, and uclust. The versions used for development are: PyCogent 1.5.3 NumPy 1.5.1 Python 2.7.1 uclust 1.1.579 The PyNAST algorithm works as follows: (1) Using uclust, identify the closest match to a sequence in a template alignment. (2) Pairwise align the candidate sequence and template match identified in step 1 (default uses the uclust result, but users can specify an alternative pairwise aligner). (3) Reintroduce gap pattern from the template sequence. (4) Identify insertions which expand the template length. For each 'template-expanding' insertion, find the nearest gap character in the candidate sequence and remove it. (5) Return the aligned candidate sequence. """ class UnalignableSequenceError(Exception): pass def pair_hmm_align_unaligned_seqs(seqs,moltype,params={}): """ This needs to be moved to cogent.align.align """ seqs = LoadSeqs(data=seqs,moltype=moltype,aligned=False) try: s1, s2 = seqs.values() except ValueError: raise ValueError,\ "Pairwise aligning of seqs requires exactly two seqs." try: gap_open = params['gap_open'] except KeyError: gap_open = 5 try: gap_extend = params['gap_extend'] except KeyError: gap_extend = 2 try: score_matrix = params['score_matrix'] except KeyError: score_matrix = make_dna_scoring_dict(\ match=1,transition=-1,transversion=-1) return global_pairwise(s1,s2,score_matrix,gap_open,gap_extend) def blast_align_unaligned_seqs(seqs,moltype,params={}): """ Pairwise align two seqs using bl2seq This needs to be moved to the blast application controller. """ seqs = dict(LoadSeqs(data=seqs,moltype=moltype,aligned=False).items()) seq_ids = seqs.keys() query_id = seq_ids[0] subject_id = seq_ids[1] if len(seq_ids) != 2: raise ValueError,\ "Pairwise aligning of seqs with blast requires exactly two seqs." in_filepath1 = get_tmp_filename(tmp_dir='/tmp/',\ prefix='bl2seq_input1_',suffix='.fasta') in_filepath2 = get_tmp_filename(tmp_dir='/tmp/',\ prefix='bl2seq_input2_',suffix='.fasta') in_filepaths = [in_filepath1,in_filepath2] out_filepath = get_tmp_filename(tmp_dir='/tmp/',\ prefix='bl2seq_output_',suffix='.fasta') for n,in_filepath in zip(seq_ids,in_filepaths): f = open(in_filepath,'w') f.write('>%s\n' % n) f.write(str(seqs[n])) f.write('\n') f.close() # Note: -S 1 indicated that we don't want to blast both orientations -- at # this would be different behavior than other pairwise aligners. bl2seq_res = system('bl2seq -i %s -j %s -o %s -F F -S 1 -q -1 -p blastn -VT' %\ (in_filepath1,in_filepath2,out_filepath)) if bl2seq_res != 0: raise RuntimeError, "bl2seq failed:\n %s" % bl2seq_res query_seq = [] subject_seq = [] blast_res = open(out_filepath) in_result = False for line in blast_res: if line.strip().startswith('Score'): if in_result: break else: in_result = True if line.startswith('Query: '): fields = line.split() query_seq.append(fields[2].upper()) elif line.startswith('Sbjct: '): fields = line.split() subject_seq.append(fields[2].upper()) else: continue remove(in_filepath1) remove(in_filepath2) remove(out_filepath) # reintroduce terminal characters which were not aligned -- this # needs to be split out to another function to facilitate easier testing q = ''.join(query_seq) q = q.replace('-','') s = ''.join(subject_seq) s = s.replace('-','') query_in = str(seqs[query_id]) subject_in = str(seqs[subject_id]) q_start = query_in.index(q[:100]) q_end = q_start + len(q) s_start = subject_in.index(s[:100]) s_end = s_start + len(s) five_prime_bases_to_add = max(q_start,s_start) three_prime_bases_to_add = max(len(query_in)-q_end, len(subject_in)-s_end) if five_prime_bases_to_add: leading_bases = query_in[:q_start] query_seq = '%s%s%s' % \ ('-'*(five_prime_bases_to_add-len(leading_bases)),\ leading_bases, ''.join(query_seq)) leading_bases = subject_in[:s_start] subject_seq = '%s%s%s' % \ ('-'*(five_prime_bases_to_add-len(leading_bases)),\ leading_bases,\ ''.join(subject_seq)) if three_prime_bases_to_add: trailing_bases = query_in[q_end:] query_seq = '%s%s%s' %\ (''.join(query_seq),\ trailing_bases,\ '-'*(three_prime_bases_to_add-len(trailing_bases))) trailing_bases = subject_in[s_end:] subject_seq = '%s%s%s' %\ (''.join(subject_seq),\ trailing_bases,\ '-'*(three_prime_bases_to_add-len(trailing_bases))) result = [(query_id,query_seq),\ (subject_id,subject_seq)] return LoadSeqs(data=result,moltype=moltype) def align_two_seqs(template, candidate, align_unaligned_seqs_f=muscle_align_unaligned_seqs, params={},moltype=DNA): """ Align the two sequences with an arbitrary aligner function template: the template sequence to align (string) candidate: the candidate sequence to align (string) align_unaligned_seqs_f: function to be applied to aligned the candidate and template sequences -- function must be of the form align_unaligned_seqs_f(seqs,moltype,params=params) params: params to be passed to align_unaligned_seqs moltype: moltype to be passed to align_unaligned_seqs """ # Load the sequences into a form useful to align_unaligned_seq_f seqs = [('template',str(template)), ('candidate',str(candidate))] # Align the sequences aln = align_unaligned_seqs_f(seqs,moltype,params=params) # Extract the sequences from the alignment object and return them return aln.getGappedSeq('template'), aln.getGappedSeq('candidate') def reintroduce_template_spacing(template, pw_aligned_template,pw_aligned_candidate): """ reintroduce template gap spacing into pairwise aligned sequences """ # Check for the simple case where the alignment reproduced the # template spacing if template == pw_aligned_template: return (pw_aligned_template, pw_aligned_candidate,[]) # get gap maps to help with relating the aligned template sequence # to the pairwise aligned template and candidate sequences template_seq_to_aln = template.gapMaps()[0] pw_template_seq_to_aln, pw_template_aln_to_seq = \ pw_aligned_template.gapMaps() # build a list to keep track of gaps that were introduced in # the pairwise alignment but which were not present in the template # alignment new_gaps_in_pw_alignment = [] # create variable to keep track of how many gaps have been # reintroduced so far from the template to the pw_aligned_template - # this is necessary to efficently compute new_gaps_in_pw_alignment total_reintroduced_gaps = 0 template_result = list(pw_aligned_template) candidate_result = list(pw_aligned_candidate) # begin iteration over the alignment positions for aln_curr_pos in range(len(pw_aligned_template)): try: # map the current alignment position to the # corresponding sequence (ie. ungapped) position seq_curr_pos = \ pw_template_aln_to_seq[aln_curr_pos] except KeyError: # if the current alignment position is a gap, move # on to the next alignment position continue # store the next sequence position as it is used in several places seq_next_pos = seq_curr_pos + 1 try: # Get the number of gaps between the next and current # alignment positions in the template alignment template_post_char_gaps = \ template_seq_to_aln[seq_next_pos] - \ template_seq_to_aln[seq_curr_pos] - 1 except KeyError: # at the end of the sequence break # Get the number of gaps between the next and current # alignment positions in the template sequence in the # pairwise alignment pw_template_post_char_gaps = \ pw_template_seq_to_aln[seq_next_pos] -\ aln_curr_pos - 1 # compute the difference in the number of gaps following the # current position in the two alignments addl_gaps = template_post_char_gaps - pw_template_post_char_gaps if addl_gaps > 0: # if the additional gaps is greater than zero, additional # gap characters need to be added to the pairwise alignment insertion_point = aln_curr_pos + 1 + total_reintroduced_gaps template_result[insertion_point:insertion_point] = ['-'] * addl_gaps candidate_result[insertion_point:insertion_point] = ['-'] * addl_gaps # update the tally of reintroduced gaps total_reintroduced_gaps += addl_gaps elif addl_gaps < 0: # if the additional gaps is less than zero, the pairwise # alignment introduced new gaps -- store these positions to be # dealt with later. Note that first_new_gap_pos is # adjusted by adding the number of the gap characters # reintroduced to the current point. Positions # in new_gaps_in_pw_alignment therefore refer to positions in # the alignments being returned from this function first_new_gap_pos = aln_curr_pos + total_reintroduced_gaps + 1 # add the positions of the new gaps chars to the list # of new gaps new_gaps_in_pw_alignment += \ range(first_new_gap_pos,first_new_gap_pos + (-1*addl_gaps)) else: # gap pattern is the same following the current sequence # position pass return (DNA.makeSequence(''.join(template_result)), \ DNA.makeSequence(''.join(candidate_result)),\ new_gaps_in_pw_alignment) def nearest_gap(seq,pos): """ Returns the position of the nearest gap to pos in seq """ # Catch negative sequence positions if pos < 0: raise IndexError, "Sequence positions cannot be negative: %d" % pos # If pos contains a gap, that's the closest gap if seq[pos] == '-': return pos # create a list to store the nearest gap character in the 5' and # 3' directions choices = [] # find the nearest gap 5' of pos try: gap_index = ''.join(seq[:pos]).rindex('-') distance = pos - gap_index choices.append((distance,gap_index)) except ValueError: pass # find the nearest gap 3' of pos try: gap_index = pos + ''.join(seq[pos:]).index('-') distance = gap_index - pos choices.append((distance,gap_index)) except ValueError: pass # error if there are no gaps in the sequence if not choices: raise UnalignableSequenceError,\ "Can't adjust alignment because there are too few gaps to "+\ "remove in the aligned candidate to reduce to the length of "+\ "the template alignment (i.e., candidate adds too many insertions "+\ "during pairwise alignment)." # return the gap_index of the choice with the smaller distance -- if there # is a tie, will delete the 5' gap (which is what original NAST does) return min(choices)[1] def adjust_alignment(template,candidate,new_gaps): """adjust template/candidate aln to remove gaps added by pairwise alignment This step adjusts the alignment to reduce the length back to the template alignment length by introducing local misalignments to remove gap characters that are present in the pairwise alignment but not in the template alignment. """ template_l = list(template) candidate_l = list(candidate) new_gaps.reverse() for pos in new_gaps: del template_l[pos] del candidate_l[nearest_gap(candidate_l,pos)] return (DNA.makeSequence(''.join(template_l)), \ DNA.makeSequence(''.join(candidate_l))) def introduce_terminal_gaps(template,aligned_template,aligned_candidate): """ introduce terminal gaps from template into the aligned candidate seq """ # count the 5' gaps in the original aligned template original_five_prime_gaps = 0 for c in template: if c == '-': original_five_prime_gaps +=1 else: break # count the 5' gaps already existing in the pairwise aligned template # (because we don't need to add these) aligned_template_five_prime_gaps = 0 for c in aligned_template: if c == '-': aligned_template_five_prime_gaps += 1 else: break # compute the number of 5' gaps that need to be added to get to the # original alignment length five_prime_gaps_to_add = \ original_five_prime_gaps - aligned_template_five_prime_gaps # count the 3' gaps in the original aligned template original_three_prime_gaps = 0 for c in reversed(template): if c == '-': original_three_prime_gaps +=1 else: break # count the 3' gaps already existing in the pairwise aligned template # (because we don't need to add these) aligned_template_three_prime_gaps = 0 for c in reversed(aligned_template): if c == '-': aligned_template_three_prime_gaps += 1 else: break # compute the number of 3' gaps that need to be added to get to the # original alignment length three_prime_gaps_to_add = \ original_three_prime_gaps - aligned_template_three_prime_gaps # return the sequence with the 5' and 3' gaps added return DNA.makeSequence(''.join([\ '-'*five_prime_gaps_to_add,\ str(aligned_candidate),\ '-'*three_prime_gaps_to_add]),\ Name=aligned_candidate.Name) def remove_template_terminal_gaps(candidate,template): """Remove template terminal gaps and corresponding bases in candidate """ if len(template) != len(candidate): raise ValueError, \ "Sequences must be aligned, but their "+\ "lengths aren't equal. %d != %d" % (len(candidate),len(template)) if len(template) == 0: return candidate, template degapped_candidate_len = len(candidate.degap()) candidate = DNA.makeSequence(candidate) template = DNA.makeSequence(template) template_gap_vector = template.gapVector() first_non_gap = template_gap_vector.index(False) num_three_prime_gaps = template_gap_vector[::-1].index(False) last_non_gap = len(template_gap_vector) - num_three_prime_gaps # Construct the candidate name, which will include the range of bases # from the original sequence candidate = candidate[first_non_gap:last_non_gap] template = template[first_non_gap:last_non_gap] candidate_start_pos = first_non_gap + 1 candidate_end_pos = degapped_candidate_len - num_three_prime_gaps candidate_name = candidate.Name if candidate_name.endswith('RC'): name_delimiter = ':' else: name_delimiter = ' ' candidate_name = '%s%s%d..%d' %\ (candidate_name,name_delimiter,candidate_start_pos,candidate_end_pos) return DNA.makeSequence(candidate,Name=candidate_name), template def depreciation_warning(d): if d: print "Unsupported or depreciated options "+\ "passed to pynast: %s\n" % ' '.join(d.keys()) +\ " blast_db, max_e_value, and addl_blast_params are depreciated " +\ "and will be removed in PyNAST 1.2." def pynast_seq(candidate_sequence, template_alignment, max_hits=30, min_pct=75.0, min_len=1000, align_unaligned_seqs_f=None, **kwargs): """ Apply PyNAST to a single sequence candidate_sequence a single DNA sequence object template_alignment a PyCogent alignment object containing the template alignment or a fasta filepath max_hits Maximum number of uclust hits to return min_pct minimum % identity for best database match min_len minimum length of match for alignment align_unaligned_seqs_f Function to align sequences. Must be of the form: align_unaligned_seqs(seqs, moltype, params=None) see cogent.app.muscle_v38.align_unaligned_seqs """ depreciation_warning(kwargs) class SingleSeqLogger(object): """ A simple object to store results of a single pynast run """ def setUp(self): self.Data = None def record(self,*args): self.Data = tuple(args) l = SingleSeqLogger() candidate_sequences = [(candidate_sequence.Name,str(candidate_sequence))] aligned_seq, exit_status = list(ipynast_seqs(candidate_sequences, template_alignment, max_hits=max_hits, min_pct=min_pct, min_len=min_len, align_unaligned_seqs_f=align_unaligned_seqs_f, log_fp=None, logger=l))[0] if exit_status == 0: return l.Data[3], aligned_seq else: raise UnalignableSequenceError, l.Data[2] def ipynast_seqs(candidate_sequences, template_alignment, max_hits=30, min_pct=75.0, min_len=1000, align_unaligned_seqs_f=None, log_fp=None, logger=None,**kwargs): """Iterator that yields results of pynast on candidate_sequences This function yields the sequence and exit status of the alignment step, as (sequence, exit status) tuples. Status values can be: 0 : indicates a sucessful alignment, in which case the sequence will be aligned 1 : indicates unsucessful sequence search, in which case the sequence will be unaligned 2 : indicates alignment did not meet minimum requirements, in which case the sequence will be unaligned All sequences are returned as DNA sequence objects. candidate_sequences an iterable object (e.g., a list) containing tuples of (seq_id, sequence) pairs (e.g., as returned by MinimalFastaParser) or a fasta filepath template_alignment a PyCogent alignment object containing the template alignment or a fasta filepath max_hits Maximum number of uclust hits to return min_pct minimum % identity for best database match min_len minimum length of match for alignment align_unaligned_seqs_f Function to align sequences. Must be of the form: align_unaligned_seqs(seqs, moltype, params=None) see cogent.app.muscle_v38.align_unaligned_seqs log_fp Optional path to log file logger Optional NastLogger object, takes precedence over log_fp """ depreciation_warning(kwargs) files_to_remove = [] if type(candidate_sequences) == str: # filepath provided for candidate sequences candidate_sequences = MinimalFastaParser(open(candidate_sequences)) # sequence list provided for candidate sequence -- write # the seqs to a temp file to pass to uclust. This is done in all # cases to convert the sequences to uppercase in case they're not already. # The bad handling of upper versus lower-cased sequences is a uclust issue. candidate_fasta_filepath = \ get_tmp_filename(prefix='pynast_candidate',suffix='.fasta') candidate_fasta_f = open(candidate_fasta_filepath,'w') for seq_id, seq in candidate_sequences: candidate_fasta_f.write('>%s\n%s\n' % (seq_id,str(seq).upper())) candidate_fasta_f.close() files_to_remove.append(candidate_fasta_filepath) # degap the template alignment for the sequence searching step and # write it to file template_fasta_filepath = \ get_tmp_filename(prefix='pynast_template',suffix='.fasta') template_fasta_f = open(template_fasta_filepath,'w') if type(template_alignment) == str: # the template alignment was received as a filepath try: template_alignment_f = open(template_alignment) except IOError: raise IOError,\ "Cannot open specified filepath: %s" % template_alignment # template alignment provided as filepath -- process it iteratively # to handle potentially massive template_alignments template_alignment = {} for seq_id,seq in MinimalFastaParser(template_alignment_f): template_alignment[seq_id] = seq seq = Sequence(seq=seq,moltype=DNA) template_fasta_f.write('>%s\n%s\n' % (seq_id,seq.degap())) else: # the template alignment was received as a filepath template_fasta_f.write(template_alignment.degap().toFasta()) template_fasta_f.close() files_to_remove.append(template_fasta_filepath) # Set up logging. NastLogger object takes precedence over log # file path, if both are provided. if logger is not None: logger = logger elif log_fp is not None: logger = NastLogger(log_fp) else: logger = NastLogger() min_pct /= 100. # get the alignment iterator pw_alignment_iterator = uclust_search_and_align_from_fasta_filepath( candidate_fasta_filepath, template_fasta_filepath, percent_ID=min_pct, enable_rev_strand_matching=True) try: current_result = pw_alignment_iterator.next() except StopIteration: current_result = None for seq_id, seq in MinimalFastaParser(open(candidate_fasta_filepath)): seq_len = len(seq) if '-' in seq: # clean-up temporary blast database files if any were created pw_alignment_iterator.close() remove_files(files_to_remove,error_on_missing=False) raise ValueError, "Candidate sequence contains gaps. This is not supported." try: candidate_seq_id, template_seq_id, pw_aligned_candidate,\ pw_aligned_template, pct_identity = current_result except TypeError: pass if not current_result or seq_id.split()[0] != candidate_seq_id.split()[0]: # a suitable match was not found - don't align the sequence # log the failure logger.record( seq_id, # input sequence identifier len(seq), # input sequence length "No search results.") # yield the unaligned sequence and failure code yield DNA.makeSequence(seq,Name=seq_id), 1 else: # this sequence was aligned if align_unaligned_seqs_f: # if an alternate pairwise aligner was specified, unalign # and re-align the sequences. pw_aligned_template, pw_aligned_candidate =\ align_two_seqs(pw_aligned_template.replace('-',''), pw_aligned_candidate.replace('-',''), align_unaligned_seqs_f) # Cast the pairwise alignments to DNA sequence objects pw_aligned_candidate = \ DNA.makeSequence(pw_aligned_candidate,Name=candidate_seq_id) pw_aligned_template = \ DNA.makeSequence(pw_aligned_template,Name=template_seq_id) # Remove any terminal gaps that were introduced into the template # sequence pw_aligned_candidate, pw_aligned_template = \ remove_template_terminal_gaps( pw_aligned_candidate, pw_aligned_template) candidate_seq_id = pw_aligned_candidate.Name # get the aligned template sequence from the template alignment try: template_aligned_seq = \ template_alignment.getGappedSeq(template_seq_id) except AttributeError: template_aligned_seq = \ Sequence(seq=template_alignment[template_seq_id],moltype=DNA) # reintroduce the gap spacing from the template alignment pw_aligned_template, pw_aligned_candidate, new_gaps =\ reintroduce_template_spacing(template_aligned_seq,\ pw_aligned_template,pw_aligned_candidate) # delete any new gaps that were introduced during the # pairwise alignment step pw_aligned_template, pw_aligned_candidate = adjust_alignment(\ pw_aligned_template,pw_aligned_candidate,new_gaps) # reintroduce any terminal gaps that were present in the template result = introduce_terminal_gaps(\ template_aligned_seq,pw_aligned_template,pw_aligned_candidate) unaligned_length = len(result.degap()) if unaligned_length < min_len: # alignment is too short - log this as a failure error = "Alignment does not meet minimum length "+\ "requirement for alignment (%d < %d)"\ % (seq_len,min_len) logger.record( seq_id, # input sequence identifier len(seq), # input sequence length "No search results.") # yield the unaligned sequence and failure code yield DNA.makeSequence(seq,Name=seq_id), 2 else: # log the alignment logger.record( seq_id, # input sequence identifier len(seq), # input sequence length '', # Errors template_seq_id, # best template match id '%3.2f' % pct_identity, # pct id to template unaligned_length, # post alignment sequence length ) # yield the aligned sequence and sucess code yield DNA.makeSequence(result,Name=candidate_seq_id), 0 # get the next alignment try: current_result = pw_alignment_iterator.next() except StopIteration: # end of the input fasta file indicates completion, # not end of the aligned sequences continue # clean-up temporary blast database files if any were created remove_files(files_to_remove,error_on_missing=False) def null_status_callback_f(x): """Dummy function to pass as default status_callback_f""" pass def pynast_seqs(candidate_sequences, template_alignment, max_hits=30, min_pct=75.0, min_len=1000, align_unaligned_seqs_f=None, log_fp=None, logger=None, status_callback_f=null_status_callback_f,**kwargs): """Function which runs pynast_seq on candidate_sequences. Results are returned as a tuple of lists: (aligned_sequences, failed_to_align_sequences) where all sequences are DNA sequence objects. candidate_sequences an iterable object (e.g., a list) containing tuples of (seq_id, sequence) pairs (e.g., as returned by MinimalFastaParser) or a fasta filepath template_alignment a PyCogent alignment object containing the template alignment or a fasta filepath max_hits Maximum number of uclust hits to return min_pct minimum % identity for best database match min_len minimum length of match for alignment align_unaligned_seqs_f Function to align sequences. Must be of the form: align_unaligned_seqs(seqs, moltype, params=None) see cogent.app.muscle_v38.align_unaligned_seqs log_fp Optional path to log file logger Optional NastLogger object, takes precedence over log_fp status_callback_f: Callback function to provide status updates to callers of pynast_seqs. This function must take a single parameter. """ depreciation_warning(kwargs) # create lists to keep track of the aligned candidate sequences # and the sequences which fail to align aligned = [] failed_to_align = [] pynast_iterator = ipynast_seqs( candidate_sequences, template_alignment, max_hits=max_hits, min_pct=min_pct, min_len=min_len, align_unaligned_seqs_f=align_unaligned_seqs_f, log_fp=log_fp, logger=logger) for seq, status in pynast_iterator: if status == 0: aligned.append(seq) status_callback_f(seq) else: failed_to_align.append(seq) status_callback_f(seq) return aligned, failed_to_align pairwise_alignment_methods = {\ 'muscle':muscle_align_unaligned_seqs,\ 'mafft':mafft_align_unaligned_seqs,\ 'clustal':clustal_align_unaligned_seqs,\ 'blast':blast_align_unaligned_seqs,\ 'pair_hmm':pair_hmm_align_unaligned_seqs,\ 'uclust':None} PyNAST-1.2/scripts/000077500000000000000000000000001204724076100141245ustar00rootroot00000000000000PyNAST-1.2/scripts/pynast000077500000000000000000000156321204724076100153770ustar00rootroot00000000000000#!/usr/bin/env python from optparse import OptionParser from os.path import exists, splitext from cogent import LoadSeqs, DNA from cogent.core.alignment import DenseAlignment from cogent.parse.fasta import MinimalFastaParser from pynast.util import ipynast_seqs, pairwise_alignment_methods,\ null_status_callback_f from pynast.logger import NastLogger __author__ = "Greg Caporaso" __copyright__ = "Copyright 2010, The PyNAST Project" __credits__ = ["Greg Caporaso", "Kyle Bittinger"] __license__ = "GPL" __version__ = "1.2" __maintainer__ = "Greg Caporaso" __email__ = "gregcaporaso@gmail.com" __status__ = "Development" pynast_usage_string = """usage: %prog [options] {-i input_fp -t template_fp} [] indicates optional input (order unimportant) {} indicates required input (order unimportant) Example usage: %prog -i my_input.fasta -t my_template.fasta """ def parse_command_line_parameters(): """ Parses command line arguments """ usage = pynast_usage_string version = 'Version: %prog 0.1' parser = OptionParser(usage=usage, version=version) parser.add_option('-t','--template_fp',action='store',\ type='string',help='path to template '+\ 'alignment file [REQUIRED]') parser.add_option('-i','--input_fp',action='store',\ type='string',help='path to input '+\ 'fasta file [REQUIRED]') parser.add_option('-v','--verbose',action='store_true',\ dest='verbose',default=False,\ help='Print status and other information '+\ 'during execution [default: %default]') parser.add_option('-p','--min_pct_id',action='store',\ type='float',default=75.0,help='minimum percent sequence '+\ ' identity to consider a sequence a match [default: %default]') parser.add_option('-l','--min_len',action='store',\ type='int',default=1000,help='minimum sequence length '+\ 'to include in NAST alignment [default: %default]') parser.add_option('-m','--pairwise_alignment_method',action='store',\ type='string',default='uclust',help='method '+\ 'for performing pairwise alignment ' +\ '[default: %default]') parser.add_option('-a','--fasta_out_fp',action='store',\ type='string',dest='fasta_out_fp',help='path to store '+\ 'resulting alignment file ' +\ '[default: derived from input filepath]') parser.add_option('-g','--log_fp',action='store',\ type='string',dest='log_fp',help='path to store '+\ 'log file ' +\ '[default: derived from input filepath]') parser.add_option('-f','--failure_fp',action='store',\ type='string',dest='failure_fp',help='path to store '+\ 'file of seqs which fail to align ' +\ '[default: derived from input filepath]') parser.add_option('-e','--max_e_value', type='float',default=None, help='Depreciated. Will be removed in PyNAST 1.2') parser.add_option('-d','--blast_db',\ default=None,help='Depreciated. Will be removed in PyNAST 1.2') opts,args = parser.parse_args() if opts.max_e_value: print "Depreciation Warning: max_e_value no longer used "+\ "as database search now uses uclust. " if opts.blast_db: print "Depreciation Warning: blast_db no longer used "+\ "as database search now uses uclust. " if args: parser.error('All parameters must be passed as options.'+\ ' Offending parameter(s):\n %s' % '\n '.join(args)) required_options = ['input_fp','template_fp'] for option in required_options: if eval('opts.%s' % option) == None: parser.error('Required option --%s omitted.' % option) pairwise_alignment_methods = {}.fromkeys([\ 'muscle','mafft','clustal','pair_hmm','blast','uclust']) if opts.pairwise_alignment_method not in pairwise_alignment_methods: parser.error(\ 'Unknown pairwise alignment method. Available options are:\n %s' %\ ' '.join(pairwise_alignment_methods)) if not exists(opts.template_fp): parser.error(\ 'Template filepath does not exist:\n %s\n Pass a valid one via -t.'%\ opts.template_fp) if not exists(opts.input_fp): parser.error(\ 'Input filepath does not exist:\n %s\n Pass a valid one via -i.'%\ opts.input_fp) return opts,args def main(): opts, args = parse_command_line_parameters() verbose = opts.verbose seqs_fp = opts.input_fp min_pct_id = opts.min_pct_id min_len = opts.min_len template_fp = opts.template_fp align_unaligned_seqs_f =\ pairwise_alignment_methods[opts.pairwise_alignment_method] # If necessary, derive default locations for alignment, log, and # failure files by removing the file extension from the sequences # filepath, seqs_fp. seqs_fp_base = splitext(seqs_fp)[0] aln_fp = opts.fasta_out_fp or (seqs_fp_base + '_pynast_aligned.fasta') log_fp = opts.log_fp or (seqs_fp_base + '_pynast_log.txt') fail_fp = opts.failure_fp or (seqs_fp_base + '_pynast_fail.fasta') if verbose: print "Input file : %s" % seqs_fp print "Template alignment : %s" % template_fp print "Output alignment : %s" % aln_fp print "Log file : %s" % log_fp print "Failure file : %s" % fail_fp logger = NastLogger(log_fp) candidate_sequences = MinimalFastaParser(open(seqs_fp)) template_alignment = [] for seq_id, seq in MinimalFastaParser(open(template_fp)): # replace '.' chars with '-' chars # and lowercase chars with uppercase chars template_alignment.append((seq_id,seq.replace('.','-').upper())) try: template_alignment = LoadSeqs(data=template_alignment,moltype=DNA,\ aligned=DenseAlignment) except KeyError, e: raise KeyError,\ 'Only ACGT-. characters can be contained in template alignments.'+\ ' The offending character was: %s' % e pynast_iterator = ipynast_seqs(\ candidate_sequences,\ template_alignment,\ max_hits=30,\ min_pct=min_pct_id,\ min_len=min_len,\ align_unaligned_seqs_f=align_unaligned_seqs_f,\ logger=logger) aln_file = open(aln_fp,'w') fail_file = open(fail_fp,'w') completed_seq_count = 0 for seq, status in pynast_iterator: if status == 0: aln_file.write('>%s\n%s\n' % (seq.Name,str(seq))) else: fail_file.write('>%s\n%s\n' % (seq.Name,str(seq))) # Update completed sequence count, and print status message # when requested by user. completed_seq_count += 1 if verbose and completed_seq_count % 100 == 0: print '%d sequences completed.' % completed_seq_count aln_file.close() fail_file.close() if __name__ == "__main__": main() PyNAST-1.2/setup.py000066400000000000000000000030651204724076100141530ustar00rootroot00000000000000#!/usr/bin/env python # File created on 04 Feb 2010 from __future__ import division from distutils.core import setup import re __author__ = "Greg Caporaso" __copyright__ = "Copyright 2010, The PyNAST project" __credits__ = ["Greg Caporaso"] __license__ = "GPL" __version__ = "1.2" __maintainer__ = "Greg Caporaso" __email__ = "gregcaporaso@gmail.com" __status__ = "Development" long_description = """The Python Nearest Alignment Space Termination tool http://qiime.org/pynast PyNAST: a flexible tool for aligning sequences to a template alignment. J. Gregory Caporaso, Kyle Bittinger, Frederic D. Bushman, Todd Z. DeSantis, Gary L. Andersen, and Rob Knight. January 15, 2010, DOI 10.1093/bioinformatics/btp636. Bioinformatics 26: 266-267. """ try: import cogent except ImportError: print "PyCogent not installed but required. (Is it installed? Is it in the current users $PYTHONPATH or site-packages?) See http://pycogent.sourceforge.net." exit(1) pycogent_version = tuple([int(v) \ for v in re.split("[^\d]", cogent.__version__) if v.isdigit()]) if pycogent_version < (1,5,3): print "PyCogent >= 1.5.3 required, but %s is installed." % cogent.__version__ exit(1) setup(name='PyNAST', version=__version__, description='The Python Nearest Alignment Space Termination tool', author=__maintainer__, author_email=__email__, maintainer=__maintainer__, maintainer_email=__email__, url='http://qiime.org/pynast', packages=['pynast'], scripts=['scripts/pynast'], long_description=long_description ) PyNAST-1.2/tests/000077500000000000000000000000001204724076100135775ustar00rootroot00000000000000PyNAST-1.2/tests/all_tests.py000077500000000000000000000070561204724076100161560ustar00rootroot00000000000000#!/usr/bin/env python """Run all tests. """ from os import walk, environ from subprocess import Popen, PIPE, STDOUT from os.path import join, abspath, dirname, split from glob import glob import re __author__ = "Rob Knight" __copyright__ = "Copyright 2010, The PyNAST Project" __credits__ = ["Rob Knight","Greg Caporaso", "Jai Ram Rideout"] __license__ = "GPL" __version__ = "1.2" __maintainer__ = "Greg Caporaso" __email__ = "gregcaporaso@gmail.com" __status__ = "Development" def main(): pynast_dir = abspath(join(dirname(__file__),'..')) test_dir = join(pynast_dir,'tests') scripts_dir = join(pynast_dir,'scripts') unittest_good_pattern = re.compile('OK\s*$') application_not_found_pattern = re.compile('ApplicationNotFoundError') python_name = 'python' bad_tests = [] missing_application_tests = [] # Run through all of PyNAST's unit tests, and keep track of any files which # fail unit tests. unittest_names = [] for root, dirs, files in walk(test_dir): for name in files: if name.startswith('test_') and name.endswith('.py'): unittest_names.append(join(root,name)) unittest_names.sort() for unittest_name in unittest_names: print "Testing %s:\n" % unittest_name command = '%s %s -v' % (python_name, unittest_name) result = Popen(command,shell=True,universal_newlines=True,\ stdout=PIPE,stderr=STDOUT).stdout.read() print result if not unittest_good_pattern.search(result): if application_not_found_pattern.search(result): missing_application_tests.append(unittest_name) else: bad_tests.append(unittest_name) # Run through all of PyNAST's scripts, and pass -h to each one. If the # resulting stdout does not being with the Usage text, that is an # indicator of something being wrong with the script. Issues that would # cause that are bad import statements in the script, SyntaxErrors, or # other failures prior to running parse_command_line_parameters. script_names = [] script_names = glob('%s/*' % scripts_dir) script_names.sort() bad_scripts = [] for script_name in script_names: script_good_pattern = re.compile('^Usage: %s' % split(script_name)[1]) print "Testing %s." % script_name command = '%s %s -h' % (python_name, script_name) result = Popen(command,shell=True,universal_newlines=True,\ stdout=PIPE,stderr=STDOUT).stdout.read() if not script_good_pattern.search(result): bad_scripts.append(script_name) if bad_tests: print "\nFailed the following unit tests.\n%s" % '\n'.join(bad_tests) if missing_application_tests: print "\nFailed the following unit tests, in part or whole due "+\ "to missing external applications.\nDepending on the QIIME features "+\ "you plan to use, this may not be critical.\n%s"\ % '\n'.join(missing_application_tests) if bad_scripts: print "\nFailed the following script tests.\n%s" % '\n'.join(bad_scripts) # If any of the unit tests or script tests failed, or if we have any # missing application errors, use return code 1 (as python's # unittest module does) to indicate one or more failures with the test # suite. return_code = 1 if not (bad_tests or missing_application_tests or bad_scripts): print "\nAll tests passed successfully." return_code = 0 return return_code if __name__ == "__main__": exit(main()) PyNAST-1.2/tests/test_logger.py000077500000000000000000000041521204724076100164740ustar00rootroot00000000000000#!/usr/bin/env python from __future__ import division from os import remove from cogent import LoadSeqs, DNA from cogent.util.unit_test import TestCase, main from cogent.app.util import get_tmp_filename from cogent.parse.fasta import MinimalFastaParser from pynast.logger import NastLogger __author__ = "Kyle Bittinger" __copyright__ = "Copyright 2010, The PyNAST Project" __credits__ = ["Greg Caporaso", "Kyle Bittinger"] __license__ = "GPL" __version__ = "1.2" __maintainer__ = "Kyle Bittinger" __email__ = "kylebittinger@gmail.com" __status__ = "Development" class NastLoggerTests(TestCase): """Tests of the PyNAST logging class""" def setUp(self): self.filename = get_tmp_filename( prefix='NastLoggerTest', suffix='.log', ) def tearDown(self): try: remove(self.filename) except OSError: pass def test_init(self): """NastLogger.__init__ should store log filename in Filename attribute""" null_logger = NastLogger() self.assertEqual(null_logger.Filename, None) file_logger = NastLogger(self.filename) self.assertEqual(file_logger.Filename, self.filename) def test_header(self): """NastLogger.__init__ should write correct header to log file""" logger = NastLogger(self.filename) file = open(self.filename, 'r') header = file.readline() file.close() exp_header = ( 'candidate sequence ID\tcandidate nucleotide count\terrors\t' 'template ID\tBLAST percent identity to template\t' 'candidate nucleotide count post-NAST\n' ) self.assertEqual(header, exp_header) def test_record(self): """NastLogger.__init__ should record tab-separated values to log file""" logger = NastLogger(self.filename) logger.record('hello', 'world') file = open(self.filename, 'r') obs_header = file.readline() obs_message = file.readline() file.close() self.assertEqual(obs_message, 'hello\tworld\n') if __name__ == "__main__": main() PyNAST-1.2/tests/test_util.py000077500000000000000000005577421204724076100162140ustar00rootroot00000000000000#!/usr/bin/env python from __future__ import division import sys from cogent import LoadSeqs, DNA from cogent.util.misc import remove_files from cogent.core.alignment import DenseAlignment from cogent.app.util import get_tmp_filename from cogent.app.muscle_v38 import align_unaligned_seqs as muscle_align_unaligned_seqs from cogent.app.mafft import align_unaligned_seqs as mafft_align_unaligned_seqs from cogent.app.clustalw import align_unaligned_seqs as clustal_align_unaligned_seqs from cogent.parse.fasta import MinimalFastaParser from cogent.util.unit_test import TestCase, main from pynast.util import (align_two_seqs, reintroduce_template_spacing, adjust_alignment, nearest_gap, pynast_seq, introduce_terminal_gaps, UnalignableSequenceError, pynast_seqs, pair_hmm_align_unaligned_seqs, blast_align_unaligned_seqs, ipynast_seqs, remove_template_terminal_gaps) from pynast.logger import NastLogger __author__ = "Greg Caporaso" __copyright__ = "Copyright 2010, The PyNAST Project" __credits__ = ["Greg Caporaso", "Kyle Bittinger", "Jai Ram Rideout"] __license__ = "GPL" __version__ = "1.2" __maintainer__ = "Greg Caporaso" __email__ = "gregcaporaso@gmail.com" __status__ = "Development" class PyNastTests(TestCase): """ Tests of the PyNAST functionality """ def setUp(self): """ """ self.files_to_remove = [] self.full_length_test1_input_seqs =\ LoadSeqs(data=input_seqs1_fasta,moltype=DNA,aligned=False) self.full_length_test1_input_seqs_fp = \ get_tmp_filename(prefix='PyNastTest', suffix='.fasta') self.files_to_remove.append(self.full_length_test1_input_seqs_fp) full_length_test1_input_seqs_f = \ open(self.full_length_test1_input_seqs_fp,'w') full_length_test1_input_seqs_f.write(input_seqs1_fasta) full_length_test1_input_seqs_f.close() self.full_length_test1_input_seqs_lines = input_seqs1_fasta.split('\n') self.full_length_test1_template_aln = \ LoadSeqs(data=pynast_test_template_fasta1,moltype=DNA,aligned=DenseAlignment) self.full_length_test1_template_aln_fp = \ get_tmp_filename(prefix='PyNastTest', suffix='.fasta', result_constructor=str) self.files_to_remove.append(self.full_length_test1_template_aln_fp) full_length_test1_template_aln_f = \ open(self.full_length_test1_template_aln_fp,'w') full_length_test1_template_aln_f.write( self.full_length_test1_template_aln.toFasta()) full_length_test1_template_aln_f.close() self.full_length_test1_expected_aln = \ LoadSeqs(data=input_seqs1_aligned_fasta,moltype=DNA,aligned=DenseAlignment) self.full_length_test1_expected_fail = \ LoadSeqs(data=input_seqs1_fail_fasta,moltype=DNA,aligned=False) self.full_length_test2_input_seqs =\ LoadSeqs(data=input_seqs2_fasta,moltype=DNA,aligned=False) self.full_length_test2_template_aln = \ LoadSeqs(data=pynast_test_template_fasta2,moltype=DNA,aligned=DenseAlignment) self.input_seqs_gaps = input_seqs_gaps.split('\n') self.log_filename = \ get_tmp_filename(prefix='PyNastTest', suffix='.log') self.files_to_remove.append(self.log_filename) # touch the log file, so we don't get an error trying to remove it # if a test doesn't create it open(self.log_filename,'w').close() def tearDown(self): """ Clean up temporary files created by the tests """ remove_files(self.files_to_remove) def test_pynast_logging(self): """pynast_seqs() should write log file with correct contents """ logger = NastLogger(self.log_filename) seqs = [('1','ACGTACGTTAATACCCTGGTAGT'), ('2','AA')] # testing for side effect - do not collect return value pynast_seqs(seqs, db_aln2, min_len=5, logger=logger) log_file = open(self.log_filename, 'r') header = log_file.readline() contents = log_file.read() log_file.close() self.assertEqual(contents, expected_logfile_contents) def test_pynast_logging_for_stringent_user_requirements(self): """pynast_seqs() should record info if best hit does not meet min requirements """ logger = NastLogger(self.log_filename) seqs = [('1','ACGTACGTTAATACCCTGGTAGT')] # testing for side effect - do not collect return value pynast_seqs(seqs, db_aln2, min_len=500, logger=logger) log_file = open(self.log_filename, 'r') header = log_file.readline() contents = log_file.read() log_file.close() self.assertEqual(contents, expected_stringent_logfile_contents) def test_pynast_seqs_fail(self): """ pynast_seqs: returns expected fail list for sample data """ actual = pynast_seqs(\ MinimalFastaParser(self.full_length_test1_input_seqs_lines),\ self.full_length_test1_template_aln,\ min_len=1000,min_pct=75.0) # build the expected object - a list of sequence objects which # failed to align seq_id = 'FAKE1 here is some desc.73602 tag1;tag2, tag3:tag4' expected = [\ DNA.makeSequence(self.full_length_test1_expected_fail.getSeq(seq_id),\ Name=seq_id)] self.assertEqual(actual[1],expected) def test_pynast_seqs_exact_matches(self): """ pynast_seqs: perfectly aligns several exact template matches """ template_aln = self.full_length_test1_template_aln # Build the expected result object, which is a list of # dna sequence objects where names include the aligned span expected_seqs = [] for n in template_aln.Names: expected_seqs.append(\ DNA.makeSequence(\ str(template_aln.getGappedSeq(n)),\ Name='%s 1..%d' % (n,len(template_aln.getSeq(n).degap())))) expected_aln = LoadSeqs(data=expected_seqs,\ moltype=DNA,aligned=DenseAlignment) input_seqs = self.full_length_test1_template_aln.degap() # run pynast_seqs on the input sequences actual = pynast_seqs(input_seqs.todict().items(),\ template_aln,\ min_len=1000,min_pct=75.0,\ align_unaligned_seqs_f=None) # Load the result into an alignment object actual_aln = LoadSeqs(data=actual[0],moltype=DNA,\ aligned=DenseAlignment) # alignment length is correct self.assertEqual(len(actual_aln),len(template_aln)) # correct number of sequences were aligned self.assertEqual(actual_aln.getNumSeqs(),expected_aln.getNumSeqs()) # same collection of seq ids is returned actual_names = actual_aln.Names actual_names.sort() expected_names = expected_aln.Names expected_names.sort() self.assertEqual(actual_names,expected_names) # all sequence lengths match expected sequence lengths (ie, no # missing bases) for seq_id in actual_aln.Names: self.assertEqual(\ len(actual_aln.getSeq(seq_id)),\ len(expected_aln.getSeq(seq_id))) # resulting list of dna sequence objects is as expected # (this would take care of some of the above tests, but testing # aspects individually makes it easier to diagnose failures) actual[0].sort() expected_seqs.sort() self.assertEqual(actual[0],expected_seqs) # fail list is empty self.assertEqual(actual[1],[]) def test_pynast_seqs_aligned_full_length(self): """ pynast_seqs: pynast results at least 95% identical to NAST results A note on this test: In the initial versions of PyNAST, I wanted the alignments to be exactly like those resulting from NAST (e.g., in PyNAST 1.0). I've since abandoned that, in favor of getting improved alignments. This test was modified after PyNAST 1.0, and I'm now only testing that the alignments are similar to those derived from NAST. This test may be of little use, but it is a nice test of the code on full-length sequences, so I hesitate to delete it. -Greg (24 Mar 2010) """ template_aln = self.full_length_test1_template_aln expected_aln = self.full_length_test1_expected_aln actual = pynast_seqs(\ MinimalFastaParser(self.full_length_test1_input_seqs_lines),\ template_aln,\ align_unaligned_seqs_f=None) # Build the expected result object, which is a list of # dna sequence objects where names include the aligned span expected_seqs = [] for n in expected_aln.Names: expected_seqs.append(\ DNA.makeSequence(str(expected_aln.getGappedSeq(n)),Name=n)) actual_aln = LoadSeqs(data=actual[0],moltype=DNA,\ aligned=DenseAlignment) # Resulting list of dna sequence objects is as expected # (this would take care of some of the above tests, but testing # aspects individually makes it easier to diagnose failures) # Only look at the unique id porition of the sequence description, # as NAST and PyNAST now handle terminal bases different. NAST # does local alignments, so sometimes loses terminal bases. PyNAST # does global alignments, so the candidate only lose terminal bases # if they introduce terminal gaps in the template alignments. a_list = [(a.Name.split()[0], a) for a in actual[0]] e_list = [(e.Name.split()[0], e) for e in expected_seqs] a_list.sort() e_list.sort() for a,e in zip(a_list,e_list): # first component of names are equal self.assertEqual(a[0],e[0]) a_seq = a[1] e_seq = e[1] count_same = 0 for i in range(len(a_seq)): if a_seq[i] == e_seq[i]: count_same += 1 percent_same = count_same/len(a_seq) self.assertTrue(percent_same >= 0.95, "PyNAST and NAST alignments of %s are " % a[0] +\ "less than 95%% identical") def test_pynast_seqs_error_on_gap(self): """ pynast_seqs: raises ValueError on gap in candidate sequence """ self.assertRaises(ValueError,pynast_seqs, MinimalFastaParser(self.input_seqs_gaps),\ self.full_length_test1_template_aln,\ min_len=1000,min_pct=75.0) def test_pynast_seqs_simple(self): """pynast_seqs: fns with simple test data """ candidate_seqs = [\ ('1','ACGTACGTTAATACCCTGGTAGT'),\ ('2','ACGTACGTTAATACCCTGGTAGT'),\ ('3','AA')] expected_aln = [\ DNA.makeSequence('ACGTACGT-TA--ATA-C-----CC-T-G-GTA-G-T---',Name='1'),\ DNA.makeSequence('ACGTACGT-TA--ATA-C-----CC-T-G-GTA-G-T---',Name='2')] expected_fail = [DNA.makeSequence('AA',Name='3')] actual = pynast_seqs(candidate_seqs,db_aln2,min_len=5,min_pct=75.0) self.assertEqual(actual,(expected_aln,expected_fail)) # all fail when min_len restricts matches expected_aln = [] expected_fail = [\ DNA.makeSequence('ACGTACGTTAATACCCTGGTAGT',Name='1'),\ DNA.makeSequence('ACGTACGTTAATACCCTGGTAGT',Name='2'),\ DNA.makeSequence('AA',Name='3')] actual = pynast_seqs(candidate_seqs,db_aln2,min_len=5000,min_pct=75.0) self.assertEqual(actual,(expected_aln,expected_fail)) def test_pynast_seqs_simple_alt_pairwise(self): """pynast_seqs: fns with alt pairwise aligner """ # tests that the order of the returned sequences is correct # as this is easy to screw up candidate_seqs = [('1','AGCCCCTTTT')] template_aln = LoadSeqs(data=dict([ ('2','ACCC-----CCTTTT')]),\ moltype=DNA,aligned=DenseAlignment) expected_aln = [DNA.makeSequence('AGCC-----CCTTTT',Name='1')] expected_fail = [] actual = pynast_seqs(candidate_seqs,template_aln, min_len=5,min_pct=75.0,\ align_unaligned_seqs_f=pair_hmm_align_unaligned_seqs) self.assertEqual(actual,(expected_aln,expected_fail)) # tests that the aligner was actually applied, as it's # nearly impossible to get different alignments with # different aligners on these short test sequences -- # therefore test with a fake aligner that alters the sequence def fake_aligner(seqs,moltype,params={}): return LoadSeqs(data=[('candidate','AGGGGGTTTT'), ('template', 'ACCCCCTTTT')],moltype=DNA) candidate_seqs = [('1','ACCCCCTTTT')] template_aln = LoadSeqs(data=dict([ ('2','ACCC-----CCTTTT')]),\ moltype=DNA,aligned=DenseAlignment) expected_aln = [DNA.makeSequence('AGGG-----GGTTTT',Name='1')] expected_fail = [] actual = pynast_seqs(candidate_seqs,template_aln, min_len=5,min_pct=75.0,\ align_unaligned_seqs_f=fake_aligner) self.assertEqual(actual,(expected_aln,expected_fail)) def test_ipynast_seqs_simple(self): """ipynast_seqs: fns with simple test data """ candidate_seqs = [\ ('1','ACGAACGTTAATACCCTGGAAGT'),\ ('2','ACGTACGTTAATACCCTGGTAGT'),\ ('3','AA')] expected = [\ (DNA.makeSequence(\ 'ACGAACGT-TA--ATA-C-----CC-T-G-GAA-G-T---',Name='1'),0),\ (DNA.makeSequence(\ 'ACGTACGT-TA--ATA-C-----CC-T-G-GTA-G-T---',Name='2'),0),\ (DNA.makeSequence('AA',Name='3'),1)] actual = list(ipynast_seqs(\ candidate_seqs,db_aln2,min_len=5,min_pct=75.0)) self.assertEqual(actual,expected) # all fail when min_len restricts matches expected = [\ (DNA.makeSequence('ACGAACGTTAATACCCTGGAAGT',Name='1'),2),\ (DNA.makeSequence('ACGTACGTTAATACCCTGGTAGT',Name='2'),2),\ (DNA.makeSequence('AA',Name='3'),1)] actual = list(ipynast_seqs(\ candidate_seqs,db_aln2,min_len=5000,min_pct=75.0)) self.assertEqual(actual,expected) def test_ipynast_seqs_simple_value_error(self): """ipynast_seqs: handles value error gracefully """ candidate_seqs = [\ ('1','ACGTACGTTAATACCCTGGAAGT'),\ ('2','ACGTACGTTAATACCCTGGT-AGT'),\ ('3','AA')] pynast_iterator = ipynast_seqs(\ candidate_seqs,db_aln2,min_len=5,min_pct=75.0) self.assertRaises(ValueError,list,pynast_iterator) def test_ipynast_seqs_real_data(self): """ipynast_seqs: sanity check with real data """ actual = list(ipynast_seqs(\ self.full_length_test2_input_seqs.items(),\ self.full_length_test2_template_aln,\ min_len=5,min_pct=75.0)) # correct number of results returned self.assertEqual(len(actual),1) actual = list(ipynast_seqs(\ self.full_length_test1_input_seqs.items(),\ self.full_length_test1_template_aln,\ min_len=5,min_pct=75.0)) # correct number of results returned self.assertEqual(len(actual),6) self.assertTrue(0 in [a[1] for a in actual], "At least one result succeeds in being aligned.") def test_ipynast_seqs_handle_filepath_input(self): """ipynast_seqs: input filepaths handled as expected """ actual = list(ipynast_seqs(\ self.full_length_test1_input_seqs.items(),\ self.full_length_test1_template_aln_fp,\ min_len=5,min_pct=75.0)) # correct number of results returned self.assertEqual(len(actual),6) self.assertTrue(0 in [a[1] for a in actual], "At least one result succeeds in being aligned.") def test_pynast_seqs_simple_status_callback(self): """pynast_seqs: status callback functions as expected """ candidate_seqs = [\ ('1','ACGTACGTTAATACCCTGGTAGT'),\ ('2','ACGTACGTTAATACCCTGGTAGT'),\ ('3','AA')] expected_aln = [\ DNA.makeSequence('ACGTACGT-TA--ATA-C-----CC-T-G-GTA-G-T---',Name='1'),\ DNA.makeSequence('ACGTACGT-TA--ATA-C-----CC-T-G-GTA-G-T---',Name='2')] expected_fail = [DNA.makeSequence('AA',Name='3')] class StatusTracker(object): completed_seqs_count = 0 def update_completed_seqs_count(self,x): self.completed_seqs_count += 1 st = StatusTracker() self.assertEqual(st.completed_seqs_count,0) results = pynast_seqs(candidate_seqs,db_aln2,min_len=5,min_pct=75.0,\ status_callback_f=st.update_completed_seqs_count) self.assertEqual(st.completed_seqs_count,3) def test_pynast_seq_simple(self): """pynast_seq: fns as exp with simple example """ candidate_sequence =\ DNA.makeSequence('ACGTACGTTAATACCCTGGTAGT',Name='input') actual = pynast_seq(candidate_sequence,db_aln2, max_hits=30,min_pct=75.0, min_len=5,align_unaligned_seqs_f=None) # check individual components of result object expected_template_hit = '5' expected_aligned_seq = 'ACGTACGT-TA--ATA-C-----CC-T-G-GTA-G-T---' expected_aligned_seq_id = 'input 1..23' self.assertEqual(actual[0],expected_template_hit) self.assertEqual(str(actual[1]),expected_aligned_seq) self.assertEqual(actual[1].Name,expected_aligned_seq_id) # check full result object expected = ('5',\ DNA.makeSequence('ACGTACGT-TA--ATA-C-----CC-T-G-GTA-G-T---',\ Name='input 1..23')) self.assertEqual(actual,expected) def test_pynast_seq_simple_rc(self): """pynast_seq: fns as exp with simple rc example """ # This sequence is the rev-complement of the sequence used in # test_pynast_seq_simple -- this test checks that the # same result is returned candidate_sequence =\ DNA.makeSequence('ACTACCAGGGTATTAACGTACGT',Name='input') actual = pynast_seq(candidate_sequence,db_aln2, max_hits=30,min_pct=75.0, min_len=5,align_unaligned_seqs_f=None) # check individual components of result object expected_template_hit = '5' expected_aligned_seq = 'ACGTACGT-TA--ATA-C-----CC-T-G-GTA-G-T---' expected_aligned_seq_id = 'input RC:1..23' self.assertEqual(actual[0],expected_template_hit) self.assertEqual(str(actual[1]),expected_aligned_seq) self.assertEqual(actual[1].Name,expected_aligned_seq_id) # check full result object expected = ('5',\ DNA.makeSequence('ACGTACGT-TA--ATA-C-----CC-T-G-GTA-G-T---',\ Name='input RC:1..23')) self.assertEqual(actual,expected) def test_pynast_seq_10116(self): """pynast_seq: real seq that introduces 5' gaps in pw aligned template The pairwise alignment of this sequence to the template alignment results in five prime gaps in the pairwise aligned template. This caused a bug in early versions of PyNAST because too many terminal gaps were being reintroduced. Therefore keeping this as a real test case, essentially of the introduce_terminal_gaps functionality. """ candidate_sequence =\ LoadSeqs(data=input_seq_10116.split('\n'),moltype=DNA).\ getSeq('10116') template_aln = self.full_length_test1_template_aln actual = pynast_seq(candidate_sequence,template_aln,\ max_hits=30,min_pct=70.0,min_len=150,\ align_unaligned_seqs_f=None) self.assertEqual(len(actual[1]),len(template_aln)) def test_pynast_seq_14990(self): """pynast_seq: aligning handles input seq longer than best template seq """ template_aln =\ LoadSeqs(data=template_14990_trimmed.split('\n'),\ moltype=DNA,aligned=DenseAlignment) candidate_sequence =\ LoadSeqs(data=input_seq_14990.split('\n'),moltype=DNA).\ getSeq('14990') expected = ('14990_5_and_3_prime_lost_four_bases_each',\ template_aln.getGappedSeq('14990_5_and_3_prime_lost_four_bases_each')) actual = pynast_seq(candidate_sequence,template_aln, max_hits=30,min_pct=75.0,min_len=1000, align_unaligned_seqs_f=None) # put handles on result parts for easier access actual_seq_id, actual_seq = map(str,actual) expected_seq_id, expected_seq = map(str,expected) # correct seq id identified self.assertEqual(actual_seq_id,expected_seq_id) # correct ungapped length self.assertEqual(len(actual_seq.replace('-','')),\ len(expected_seq.replace('-',''))) # correct gapped length self.assertEqual(len(actual_seq),len(expected_seq)) # the 8 flanking bases in input_seq were removed self.assertEqual(len(actual_seq.replace('-','')),\ len(candidate_sequence)-8) # aligned seqs are equal self.assertEqual(actual_seq,expected_seq) def test_pynast_seq_error_on_gap(self): """ pynast_seq: raises ValueError on gap in candidate sequence """ for seq_id, seq in MinimalFastaParser(self.input_seqs_gaps): # error when gap(s) in seq cs = DNA.makeSequence(seq,Name=seq_id) self.assertRaises(ValueError,pynast_seq,cs,db_aln2,\ max_hits=1,min_pct=75.0,min_len=5,align_unaligned_seqs_f=None) seq = seq.replace('-','').replace('.','') # no error when no gaps in seq cs = DNA.makeSequence(seq,Name=seq_id) r = pynast_seq(cs,db_aln2,\ max_hits=1,min_pct=70.0,min_len=5,align_unaligned_seqs_f=None) def test_align_two_seqs_with_muscle(self): """ align_two_seqs: fns for simple alignments with muscle """ # Only a few trivial cases are tested as it is not the place to # test how the aligners functions f = muscle_align_unaligned_seqs # perfect alignment s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') self.assertEqual(align_two_seqs(s1,s2,f),(s1,s2)) # gap added to s2 s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') s2 = DNA.makeSequence('ACGTACGTACATCCCTGGTAGT') exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') exp2 = DNA.makeSequence('ACGTACGTACAT-CCCTGGTAGT') self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2)) # gap added to s1 s1 = DNA.makeSequence('ACGTACGTACATCCCTGGTAGT') s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') exp1 = DNA.makeSequence('ACGTACGTACAT-CCCTGGTAGT') exp2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2)) # single mismatch s1 = DNA.makeSequence('ACGTACGTACATTCCCTGGTAGT') s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') self.assertEqual(align_two_seqs(s1,s2,f),(s1,s2)) # truncated sequence (3') s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') s2 = DNA.makeSequence('ACGTACGTACATACCCT') exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') exp2 = DNA.makeSequence('ACGTACGTACATACCCT------') self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2)) # truncated sequence (5') s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') s2 = DNA.makeSequence('CGTACATACCCTGGTAGT') exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') exp2 = DNA.makeSequence('-----CGTACATACCCTGGTAGT') self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2)) # truncated sequence (5' and 3') s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') s2 = DNA.makeSequence('CGTACATACCCTGGT') exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') exp2 = DNA.makeSequence('-----CGTACATACCCTGGT---') self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2)) def test_align_two_seqs_with_pair_hmm(self): """ align_two_seqs: fns for simple alignments with pair_hmm alignment """ # Only a few trivial cases are tested as it is not the place to # test how the aligners functions f = pair_hmm_align_unaligned_seqs # perfect alignment s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') self.assertEqual(align_two_seqs(s1,s2,f),(s1,s2)) # gap added to s2 s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') s2 = DNA.makeSequence('ACGTACGTACATCCCTGGTAGT') exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') exp2 = DNA.makeSequence('ACGTACGTACAT-CCCTGGTAGT') self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2)) # gap added to s1 s1 = DNA.makeSequence('ACGTACGTACATCCCTGGTAGT') s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') exp1 = DNA.makeSequence('ACGTACGTACAT-CCCTGGTAGT') exp2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2)) # single mismatch s1 = DNA.makeSequence('ACGTACGTACATTCCCTGGTAGT') s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') self.assertEqual(align_two_seqs(s1,s2,f),(s1,s2)) # truncated sequence (3') s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') s2 = DNA.makeSequence('ACGTACGTACATACCCT') exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') exp2 = DNA.makeSequence('ACGTACGTACATACCCT------') self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2)) # truncated sequence (5') s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') s2 = DNA.makeSequence('CGTACATACCCTGGTAGT') exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') exp2 = DNA.makeSequence('-----CGTACATACCCTGGTAGT') self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2)) # truncated sequence (5' and 3') s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') s2 = DNA.makeSequence('CGTACATACCCTGGT') exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') exp2 = DNA.makeSequence('-----CGTACATACCCTGGT---') self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2)) def test_align_two_seqs_with_blast(self): """ align_two_seqs: fns for simple alignments with blast (bl2seq) """ # Only a few trivial cases are tested as it is not the place to # test how the aligners functions f = blast_align_unaligned_seqs # perfect alignment s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') self.assertEqual(align_two_seqs(s1,s2,f),(s1,s2)) # gap added to s2 s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') s2 = DNA.makeSequence('ACGTACGTACATCCCTGGTAGT') exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') exp2 = DNA.makeSequence('ACGTACGTACAT-CCCTGGTAGT') self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2)) # gap added to s1 s1 = DNA.makeSequence('ACGTACGTACATCCCTGGTAGT') s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') exp1 = DNA.makeSequence('ACGTACGTACAT-CCCTGGTAGT') exp2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2)) # single mismatch s1 = DNA.makeSequence('ACGTACGTACATTCCCTGGTAGT') s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') self.assertEqual(align_two_seqs(s1,s2,f),(s1,s2)) # truncated sequence (3') s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') s2 = DNA.makeSequence('ACGTACGTACATACCCT') exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') exp2 = DNA.makeSequence('ACGTACGTACATACCCT------') self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2)) # reversed order works as well (ie., extended sequence 3') self.assertEqual(align_two_seqs(s2,s1,f),(exp2,exp1)) # truncated sequence (5') s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') s2 = DNA.makeSequence('CGTACATACCCTGGTAGT') exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') exp2 = DNA.makeSequence('-----CGTACATACCCTGGTAGT') self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2)) # reversed order works as well (ie., extended sequence 5') self.assertEqual(align_two_seqs(s2,s1,f),(exp2,exp1)) # truncated sequence (5' and 3') s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') s2 = DNA.makeSequence('CGTACATACCCTGGT') exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') exp2 = DNA.makeSequence('-----CGTACATACCCTGGT---') self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2)) # reversed order works as well (ie., extended sequence 5' and 3') self.assertEqual(align_two_seqs(s2,s1,f),(exp2,exp1)) # staggered ends s1 = DNA.makeSequence('ACGTACGTACATACCCTGGT') s2 = DNA.makeSequence( 'CGTACATACCCTGGTAGTTT') exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGT-----') exp2 = DNA.makeSequence('-----CGTACATACCCTGGTAGTTT') self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2)) # reversed order works as well self.assertEqual(align_two_seqs(s2,s1,f),(exp2,exp1)) def test_align_two_seqs_with_clustal(self): """ align_two_seqs: fns for simple alignments with clustal """ # Only a few trivial cases are tested as it is not the place to # test how the aligners function f = clustal_align_unaligned_seqs # perfect alignment s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') self.assertEqual(align_two_seqs(s1,s2,f),(s1,s2)) # gap added to s2 s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') s2 = DNA.makeSequence('ACGTACGTACATCCCTGGTAGT') exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') exp2 = DNA.makeSequence('ACGTACGTACAT-CCCTGGTAGT') self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2)) # gap added to s1 s1 = DNA.makeSequence('ACGTACGTACATCCCTGGTAGT') s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') exp1 = DNA.makeSequence('ACGTACGTACAT-CCCTGGTAGT') exp2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2)) # single mismatch s1 = DNA.makeSequence('ACGTACGTACATTCCCTGGTAGT') s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') self.assertEqual(align_two_seqs(s1,s2,f),(s1,s2)) # truncated sequence (3') s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') s2 = DNA.makeSequence('ACGTACGTACATACCCT') exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') exp2 = DNA.makeSequence('ACGTACGTACATACCCT------') self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2)) # truncated sequence (5') s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') s2 = DNA.makeSequence('CGTACATACCCTGGTAGT') exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') exp2 = DNA.makeSequence('-----CGTACATACCCTGGTAGT') self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2)) # truncated sequence (5' and 3') s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') s2 = DNA.makeSequence('CGTACATACCCTGGT') exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') exp2 = DNA.makeSequence('-----CGTACATACCCTGGT---') self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2)) def test_align_two_seqs_with_mafft(self): """ align_two_seqs: fns for simple alignments with mafft """ # Only a few trivial cases are tested as it is not the place to # test how the aligners functions f = mafft_align_unaligned_seqs # perfect alignment s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') self.assertEqual(align_two_seqs(s1,s2,f),(s1,s2)) # gap added to s2 s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') s2 = DNA.makeSequence('ACGTACGTACATCCCTGGTAGT') exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') exp2 = DNA.makeSequence('ACGTACGTACAT-CCCTGGTAGT') self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2)) # gap added to s1 s1 = DNA.makeSequence('ACGTACGTACATCCCTGGTAGT') s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') exp1 = DNA.makeSequence('ACGTACGTACAT-CCCTGGTAGT') exp2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2)) # single mismatch s1 = DNA.makeSequence('ACGTACGTACATTCCCTGGTAGT') s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') self.assertEqual(align_two_seqs(s1,s2,f),(s1,s2)) # truncated sequence (3') s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') s2 = DNA.makeSequence('ACGTACGTACATACCCT') exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') exp2 = DNA.makeSequence('ACGTACGTACATACCC------T') self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2)) # truncated sequence (5') s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') s2 = DNA.makeSequence('CGTACATACCCTGGTAGT') exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') exp2 = DNA.makeSequence('-----CGTACATACCCTGGTAGT') self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2)) # truncated sequence (5' and 3') s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') s2 = DNA.makeSequence('CGTACATACCCTGGT') exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') exp2 = DNA.makeSequence('-----CGTACATACCCTG---GT') self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2)) def test_align_two_seqs_with_fake_aligner(self): """ align_two_seqs: fns for simple alignments with fake_aligner """ # Test a fake aligner function which uses the params dict def f(seqs,moltype,params={}): try: res = params['res'] except KeyError: res = 'AAAAAAAAAA' seqs = [('template',str(res)), ('candidate',str(res))] seqs = LoadSeqs(data=seqs,moltype=moltype,aligned=DenseAlignment) return seqs s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') exp1 = DNA.makeSequence('AAAAAAAAAA') exp2 = DNA.makeSequence('AAAAAAAAAA') self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2)) s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT') exp1 = DNA.makeSequence('BBB') exp2 = DNA.makeSequence('BBB') self.assertEqual(align_two_seqs(s1,s2,f,params={'res':'BBB'}),\ (exp1,exp2)) def test_reintroduce_template_spacing_template(self): """ reintroduce_template_spacing: template example from DeSantis2004 """ template = DNA.makeSequence('ATAC-----GTA-AC----GTA---C---G-T-AC-GG') pw_aligned_template = DNA.makeSequence('ATACGT-A-ACGTACGTAC--GG') pw_aligned_candidate= DNA.makeSequence('C-ACGTTAAACGT-CGTACCCGG') template_expected = \ DNA.makeSequence('ATAC-----GT-A-AC----GTA---C---G-T-AC--GG') actual = reintroduce_template_spacing(\ template,pw_aligned_template,pw_aligned_candidate) self.assertEqual(actual[0],template_expected) def test_reintroduce_template_spacing_candidate(self): """ reintroduce_template_spacing: candidate example from DeSantis2006 """ template = DNA.makeSequence('ATAC-----GTA-AC----GTA---C---G-T-AC-GG') pw_aligned_template = DNA.makeSequence('ATACGT-A-ACGTACGTAC--GG') pw_aligned_candidate= DNA.makeSequence('C-ACGTTAAACGT-CGTACCCGG') candidate_expected = \ DNA.makeSequence('C-AC-----GTTAAAC----GT----C---G-T-ACCCGG') actual = reintroduce_template_spacing(\ template,pw_aligned_template,pw_aligned_candidate) self.assertEqual(actual[1],candidate_expected) def test_reintroduce_template_spacing_new_gaps(self): """ reintroduce_template_spacing: new gaps example from DeSantis2006 """ template = DNA.makeSequence('ATAC-----GTA-AC----GTA---C---G-T-AC-GG') pw_aligned_template = DNA.makeSequence('ATACGT-A-ACGTACGTAC--GG') pw_aligned_candidate= DNA.makeSequence('C-ACGTTAAACGT-CGTACCCGG') new_gaps_expected = [11,36] actual = reintroduce_template_spacing(\ template,pw_aligned_template,pw_aligned_candidate) self.assertEqual(actual[2],new_gaps_expected) def test_reintroduce_template_spacing(self): """ reintroduce_template_spacing: example from DeSantis2006 """ template = DNA.makeSequence('ATAC-----GTA-AC----GTA---C---G-T-AC-GG') pw_aligned_template = DNA.makeSequence('ATACGT-A-ACGTACGTAC--GG') pw_aligned_candidate= DNA.makeSequence('C-ACGTTAAACGT-CGTACCCGG') template_expected = \ DNA.makeSequence('ATAC-----GT-A-AC----GTA---C---G-T-AC--GG') candidate_expected = \ DNA.makeSequence('C-AC-----GTTAAAC----GT----C---G-T-ACCCGG') new_gaps_expected = [11,36] actual = reintroduce_template_spacing(\ template,pw_aligned_template,pw_aligned_candidate) self.assertEqual(actual,\ (template_expected,candidate_expected,new_gaps_expected)) def test_reintroduce_template_spacing_no_change(self): """ reintroduce_template_spacing: no changes """ template = DNA.makeSequence('AT-CG') actual = reintroduce_template_spacing(\ template,template,template) self.assertEqual(actual,(template,template,[])) # different seqs but pw alignment matches template pattern template = DNA.makeSequence('ATC-G') pw_aligned_template = DNA.makeSequence ('ATC-G') pw_aligned_candidate = DNA.makeSequence('ATCCG') template_expected = DNA.makeSequence ('ATC-G') candidate_expected = DNA.makeSequence('ATCCG') actual = reintroduce_template_spacing(\ template,pw_aligned_template,pw_aligned_candidate) self.assertEqual(actual,(template_expected,candidate_expected,[])) def test_reintroduce_template_spacing_middle(self): """ reintroduce_template_spacing: change to non-terminal character """ template = DNA.makeSequence('GTA---C') pw_aligned_template = DNA.makeSequence( 'GTAC') pw_aligned_candidate = DNA.makeSequence('GT-C') template_expected = DNA.makeSequence( 'GTA---C') candidate_expected = DNA.makeSequence('GT----C') new_gaps_expected = [] actual = reintroduce_template_spacing(\ template,pw_aligned_template,pw_aligned_candidate) self.assertEqual(actual,\ (template_expected,candidate_expected,new_gaps_expected)) template = DNA.makeSequence('ATAC-----GTA-AC') pw_aligned_template = DNA.makeSequence( 'ATACGT-A-AC') pw_aligned_candidate = DNA.makeSequence('C-ACGTTAAAC') template_expected = DNA.makeSequence( 'ATAC-----GT-A-AC') candidate_expected = DNA.makeSequence('C-AC-----GTTAAAC') new_gaps_expected = [11] actual = reintroduce_template_spacing(\ template,pw_aligned_template,pw_aligned_candidate) self.assertEqual(actual,\ (template_expected,candidate_expected,new_gaps_expected)) # single gap in new spot template = DNA.makeSequence('GTA-AC') pw_aligned_template = DNA.makeSequence( 'GT-A-AC') pw_aligned_candidate = DNA.makeSequence('GTTAAAC') template_expected = DNA.makeSequence( 'GT-A-AC') candidate_expected = DNA.makeSequence('GTTAAAC') new_gaps_expected = [2] actual = reintroduce_template_spacing(\ template,pw_aligned_template,pw_aligned_candidate) self.assertEqual(actual,\ (template_expected,candidate_expected,new_gaps_expected)) # existing gap extended template = DNA.makeSequence('AC-GG') pw_aligned_template = DNA.makeSequence( 'AC--GG') pw_aligned_candidate = DNA.makeSequence('ACCCGG') template_expected = DNA.makeSequence( 'AC--GG') candidate_expected = DNA.makeSequence('ACCCGG') new_gaps_expected = [2] actual = reintroduce_template_spacing(\ template,pw_aligned_template,pw_aligned_candidate) self.assertEqual(actual,\ (template_expected,candidate_expected,new_gaps_expected)) def test_reintroduce_template_spacing_leading_trailing_gaps_ignored(self): """ reintroduce_template_spacing: lead/trailing template gaps ignored """ # leading gaps template = DNA.makeSequence('----AC-GG') pw_aligned_template = DNA.makeSequence( 'AC--GG') pw_aligned_candidate = DNA.makeSequence('ACCCGG') template_expected = DNA.makeSequence( 'AC--GG') candidate_expected = DNA.makeSequence('ACCCGG') new_gaps_expected = [2] actual = reintroduce_template_spacing(\ template,pw_aligned_template,pw_aligned_candidate) self.assertEqual(actual,\ (template_expected,candidate_expected,new_gaps_expected)) # trailing gaps template = DNA.makeSequence('AC-GG---') pw_aligned_template = DNA.makeSequence( 'AC--GG') pw_aligned_candidate = DNA.makeSequence('ACCCGG') template_expected = DNA.makeSequence( 'AC--GG') candidate_expected = DNA.makeSequence('ACCCGG') new_gaps_expected = [2] actual = reintroduce_template_spacing(\ template,pw_aligned_template,pw_aligned_candidate) self.assertEqual(actual,\ (template_expected,candidate_expected,new_gaps_expected)) # leading/trailing gaps template = DNA.makeSequence('-AC-GG---') pw_aligned_template = DNA.makeSequence( 'AC--GG') pw_aligned_candidate = DNA.makeSequence('ACCCGG') template_expected = DNA.makeSequence( 'AC--GG') candidate_expected = DNA.makeSequence('ACCCGG') new_gaps_expected = [2] actual = reintroduce_template_spacing(\ template,pw_aligned_template,pw_aligned_candidate) self.assertEqual(actual,\ (template_expected,candidate_expected,new_gaps_expected)) def test_adjust_alignment_paper_example(self): """ adjust_alignment: example from DeSantis2006 """ template = \ DNA.makeSequence('ATAC-----GT-A-AC----GTA---C---G-T-AC--GG') candidate = \ DNA.makeSequence('C-AC-----GTTAAAC----GT----C---G-T-ACCCGG') new_gaps = [11,36] # IS THERE A TYPO IN THEIR EXAMPLE? THEY CHANGE GT-A-AC TO # GT-AAC, BUT THAT DOESN'T REALLY MAKE SENSE GIVEN THAT THE # TEMPLATE ALIGNMENT IS GTA-AC... template_expected = \ DNA.makeSequence('ATAC-----GTA-AC----GTA---C---G-T-AC-GG') candidate_expected = \ DNA.makeSequence('C-AC----GTTAAAC----GT----C---G-TACCCGG') actual = adjust_alignment(template,candidate,new_gaps) self.assertEqual(actual,(template_expected,candidate_expected)) def test_adjust_alignment(self): """ adjust_alignmnet: simple adjustments handled as expected """ # remove a 3' gap t = DNA.makeSequence('AA-GGC---ATTAA') c = DNA.makeSequence('AATCCTT--AAAAA') new_gaps = [2] t_expected = DNA.makeSequence('AAGGC---ATTAA') c_expected = DNA.makeSequence('AATCCTT-AAAAA') self.assertEqual(adjust_alignment(t,c,new_gaps),\ (t_expected,c_expected)) # remove a 5' gap t = DNA.makeSequence('AA-GGC----TTAA') c = DNA.makeSequence('AATCCTT--AAAAA') new_gaps = [9] t_expected = DNA.makeSequence('AA-GGC---TTAA') c_expected = DNA.makeSequence('AATCCTT-AAAAA') self.assertEqual(adjust_alignment(t,c,new_gaps),\ (t_expected,c_expected)) # multiple gaps to remove t = DNA.makeSequence('AA-GGC----TTAA') c = DNA.makeSequence('AATCCTT--AAAAA') new_gaps = [2,9] t_expected = DNA.makeSequence('AAGGC---TTAA') c_expected = DNA.makeSequence('AATCCTTAAAAA') self.assertEqual(adjust_alignment(t,c,new_gaps),\ (t_expected,c_expected)) def test_adjust_alignment_multiple_adjancent_new_gaps(self): """ adjust_alignmnet: multiple adjacent new gaps handled as expected """ t = DNA.makeSequence('AA--GC---ATTAA') c = DNA.makeSequence('AATCCTT--AAAAA') new_gaps = [2,3] t_expected = DNA.makeSequence('AAGC---ATTAA') c_expected = DNA.makeSequence('AATCCTTAAAAA') actual = adjust_alignment(t,c,new_gaps) # print '' # print actual[0] # print t_expected self.assertEqual(actual,(t_expected,c_expected)) t = DNA.makeSequence('AATTGCG---CAT') c = DNA.makeSequence('AA---CTTTTAAA') new_gaps = [7,8,9] t_expected = DNA.makeSequence('AATTGCGCAT') c_expected = DNA.makeSequence('AACTTTTAAA') actual = adjust_alignment(t,c,new_gaps) # print '' # print actual[0] # print t_expected self.assertEqual(actual,(t_expected,c_expected)) t = DNA.makeSequence('AATTGCG---CAT') c = DNA.makeSequence('AA-CTTTTTA-A-') new_gaps = [7,8,9] t_expected = DNA.makeSequence('AATTGCGCAT') c_expected = DNA.makeSequence('AACTTTTTAA') actual = adjust_alignment(t,c,new_gaps) # print '' # print actual[0] # print t_expected self.assertEqual(actual,(t_expected,c_expected)) def test_nearest_gap(self): """nearest_gap: functions with single gap in seq """ seq = 'AAA-AAAA' for pos in range(len(seq)): self.assertEqual(nearest_gap(seq,pos),3) seq = '-ACGTACGT' for pos in range(len(seq)): self.assertEqual(nearest_gap(seq,pos),0) seq = 'ACGTACGT-' for pos in range(len(seq)): self.assertEqual(nearest_gap(seq,pos),8) def test_nearest_gap_mutliple_gaps(self): """nearest_gap: handles multiple gaps in same sequence """ seq = 'ACG-TT-AACC--TAAT' self.assertEqual(nearest_gap(seq,0),3) self.assertEqual(nearest_gap(seq,1),3) self.assertEqual(nearest_gap(seq,2),3) self.assertEqual(nearest_gap(seq,3),3) self.assertEqual(nearest_gap(seq,4),3) self.assertEqual(nearest_gap(seq,5),6) self.assertEqual(nearest_gap(seq,6),6) self.assertEqual(nearest_gap(seq,7),6) self.assertEqual(nearest_gap(seq,8),6) self.assertEqual(nearest_gap(seq,9),11) self.assertEqual(nearest_gap(seq,10),11) self.assertEqual(nearest_gap(seq,11),11) self.assertEqual(nearest_gap(seq,12),12) self.assertEqual(nearest_gap(seq,13),12) self.assertEqual(nearest_gap(seq,14),12) self.assertEqual(nearest_gap(seq,15),12) self.assertEqual(nearest_gap(seq,16),12) def test_nearest_gap_ambiguous(self): """nearest_gap: handles ambiguous cases by chosing the 5' position Not certain that this is how this should be handled... Maybe revisit by seeing which way gives the better alignment? """ seq = '-A-A-A-' self.assertEqual(nearest_gap(seq,1),0) self.assertEqual(nearest_gap(seq,3),2) self.assertEqual(nearest_gap(seq,5),4) def test_nearest_gap_handles_error(self): """nearest_gap: errors are handled correctly """ seq = 'AA-AAA' self.assertRaises(IndexError,nearest_gap,seq,22) self.assertRaises(IndexError,nearest_gap,seq,-1) seq = 'AAA' self.assertRaises(UnalignableSequenceError,nearest_gap,seq,1) def test_introduce_terminal_gaps_simple(self): """introduce_terminal_gaps: functions as expected """ # no terminal gaps template = DNA.makeSequence('AAA',Name='t') aligned_candidate = DNA.makeSequence('AAA',Name='ac') aligned_template = DNA.makeSequence('AAA',Name='at') actual = introduce_terminal_gaps(\ template,aligned_template,aligned_candidate) expected = DNA.makeSequence('AAA',Name='ac') self.assertEqual(actual,expected) # 5' terminal gaps only template = DNA.makeSequence('-AAA',Name='t') aligned_candidate = DNA.makeSequence('AAA',Name='ac') aligned_template = DNA.makeSequence('AAA',Name='at') actual = introduce_terminal_gaps(\ template,aligned_template,aligned_candidate) expected = DNA.makeSequence('-AAA',Name='ac') self.assertEqual(actual,expected) template = DNA.makeSequence('-----AAA',Name='t') aligned_candidate = DNA.makeSequence('AAA',Name='ac') aligned_template = DNA.makeSequence('AAA',Name='at') actual = introduce_terminal_gaps(\ template,aligned_template,aligned_candidate) expected = DNA.makeSequence('-----AAA',Name='ac') self.assertEqual(actual,expected) # 3' terminal gaps only template = DNA.makeSequence('ACG--',Name='t') aligned_candidate = DNA.makeSequence('ACG',Name='ac') aligned_template = DNA.makeSequence('AAA',Name='at') actual = introduce_terminal_gaps(\ template,aligned_template,aligned_candidate) expected = DNA.makeSequence('ACG--',Name='ac') self.assertEqual(actual,expected) template = DNA.makeSequence('ACCTG----',Name='t') aligned_candidate = DNA.makeSequence('ACGGG',Name='ac') aligned_template = DNA.makeSequence('ACCTG',Name='at') actual = introduce_terminal_gaps(\ template,aligned_template,aligned_candidate) expected = DNA.makeSequence('ACGGG----',Name='ac') self.assertEqual(actual,expected) # 5' and 3' terminal gaps template = DNA.makeSequence('---AC--CTG----',Name='t') aligned_candidate = DNA.makeSequence('ACTTGGG',Name='ac') aligned_template = DNA.makeSequence( 'AC--CTG',Name='at') actual = introduce_terminal_gaps(\ template,aligned_template,aligned_candidate) expected = DNA.makeSequence('---ACTTGGG----',Name='ac') self.assertEqual(actual,expected) def test_introduce_terminal_gaps_existing_terminal_template_gaps(self): """introduce_terminal_gaps: aligned template already has terminal gaps """ # one 5' gap in aligned_template template = DNA.makeSequence('---AAA',Name='t') aligned_candidate = DNA.makeSequence('AAAA',Name='ac') aligned_template = DNA.makeSequence('-AAA',Name='at') actual = introduce_terminal_gaps(\ template,aligned_template,aligned_candidate) expected = DNA.makeSequence('--AAAA',Name='ac') self.assertEqual(actual,expected) # multiple 5' gaps in aligned_template template = DNA.makeSequence('---AAA',Name='t') aligned_candidate = DNA.makeSequence('AAAAAA',Name='ac') aligned_template = DNA.makeSequence( '---AAA',Name='at') actual = introduce_terminal_gaps(\ template,aligned_template,aligned_candidate) expected = DNA.makeSequence('AAAAAA',Name='ac') self.assertEqual(actual,expected) # one 3' gap in aligned_template template = DNA.makeSequence('AAA---',Name='t') aligned_candidate = DNA.makeSequence('AAAA',Name='ac') aligned_template = DNA.makeSequence( 'AAA-',Name='at') actual = introduce_terminal_gaps(\ template,aligned_template,aligned_candidate) expected = DNA.makeSequence('AAAA--',Name='ac') self.assertEqual(actual,expected) # multiple 3' gaps in aligned_template template = DNA.makeSequence('AAA---',Name='t') aligned_candidate = DNA.makeSequence('AAAAAA',Name='ac') aligned_template = DNA.makeSequence( 'AAA---',Name='at') actual = introduce_terminal_gaps(\ template,aligned_template,aligned_candidate) expected = DNA.makeSequence('AAAAAA',Name='ac') self.assertEqual(actual,expected) # 5 prime, 3 prime gaps in aligned_template template = DNA.makeSequence('--CAA---',Name='t') aligned_candidate = DNA.makeSequence('GCAAT',Name='ac') aligned_template = DNA.makeSequence( '-CAA-',Name='at') actual = introduce_terminal_gaps(\ template,aligned_template,aligned_candidate) expected = DNA.makeSequence('-GCAAT--',Name='ac') self.assertEqual(actual,expected) # internal, 5', 3' gaps template = DNA.makeSequence('--CATA---',Name='t') aligned_candidate = DNA.makeSequence('GCA-AT',Name='ac') aligned_template = DNA.makeSequence( '-CATA-',Name='at') actual = introduce_terminal_gaps(\ template,aligned_template,aligned_candidate) expected = DNA.makeSequence('-GCA-AT--',Name='ac') self.assertEqual(actual,expected) def test_remove_template_terminal_gaps(self): """ removing terminal gaps functions as expected """ # no template terminal gaps candidate = DNA.makeSequence('--CGTTGG-',Name='c') template = DNA.makeSequence('ACCGT-GGA',Name='t') actual = remove_template_terminal_gaps(candidate,template) expected = (DNA.makeSequence('--CGTTGG-',Name='c 1..6'),template) self.assertEqual(actual[0].Name,expected[0].Name) self.assertEqual(actual[1].Name,expected[1].Name) self.assertEqual(actual,expected) candidate = DNA.makeSequence('',Name='c') template = DNA.makeSequence('',Name='t') actual = remove_template_terminal_gaps(candidate,template) expected = (candidate,template) self.assertEqual(actual[0].Name,expected[0].Name) self.assertEqual(actual[1].Name,expected[1].Name) self.assertEqual(actual,expected) # 5' template terminal gaps candidate = DNA.makeSequence('ACCGTTGGA',Name='c') template = DNA.makeSequence('--CGT-GGA',Name='t') actual = remove_template_terminal_gaps(candidate,template) expected = (DNA.makeSequence('CGTTGGA',Name='c 3..9'), DNA.makeSequence('CGT-GGA',Name='t')) self.assertEqual(actual[0].Name,expected[0].Name) self.assertEqual(actual[1].Name,expected[1].Name) self.assertEqual(actual,expected) candidate = DNA.makeSequence('ACCGTTGGA',Name='c') template = DNA.makeSequence('-CCGT-GGA',Name='t') actual = remove_template_terminal_gaps(candidate,template) expected = (DNA.makeSequence('CCGTTGGA',Name='c 2..9'), DNA.makeSequence('CCGT-GGA',Name='t')) self.assertEqual(actual[0].Name,expected[0].Name) self.assertEqual(actual[1].Name,expected[1].Name) self.assertEqual(actual,expected) # 3' template terminal gaps candidate = DNA.makeSequence('ACCGTTGGA',Name='c') template = DNA.makeSequence('ACCGT-GG-',Name='t') actual = remove_template_terminal_gaps(candidate,template) expected = (DNA.makeSequence('ACCGTTGG',Name='c 1..8'), DNA.makeSequence('ACCGT-GG',Name='t')) self.assertEqual(actual[0].Name,expected[0].Name) self.assertEqual(actual[1].Name,expected[1].Name) self.assertEqual(actual,expected) candidate = DNA.makeSequence('ACCGTTGGA',Name='c') template = DNA.makeSequence('ACCGT-G--',Name='t') actual = remove_template_terminal_gaps(candidate,template) expected = (DNA.makeSequence('ACCGTTG',Name='c 1..7'), DNA.makeSequence('ACCGT-G',Name='t')) self.assertEqual(actual[0].Name,expected[0].Name) self.assertEqual(actual[1].Name,expected[1].Name) self.assertEqual(actual,expected) # 5' and 3' template terminal gaps candidate = DNA.makeSequence('ACCGTTGGA',Name='c') template = DNA.makeSequence('--CGT-GG-',Name='t') actual = remove_template_terminal_gaps(candidate,template) expected = (DNA.makeSequence('CGTTGG',Name='c 3..8'), DNA.makeSequence('CGT-GG',Name='t')) self.assertEqual(actual[0].Name,expected[0].Name) self.assertEqual(actual[1].Name,expected[1].Name) self.assertEqual(actual,expected) # name constructed correctly when contains RC candidate = DNA.makeSequence('ACCGTTGGA',Name='c RC') template = DNA.makeSequence('--CGT-GG-',Name='t') actual = remove_template_terminal_gaps(candidate,template) expected = (DNA.makeSequence('CGTTGG',Name='c RC:3..8'), DNA.makeSequence('CGT-GG',Name='t')) self.assertEqual(actual[0].Name,expected[0].Name) self.assertEqual(actual[1].Name,expected[1].Name) self.assertEqual(actual,expected) # ValueError on unaligned seqs candidate = DNA.makeSequence('ACCGTTGGA',Name='c') template = DNA.makeSequence('-CGT-GG-',Name='ct') self.assertRaises(ValueError,\ remove_template_terminal_gaps,candidate,template) def test_pynast_seq_3037(self): """ uclust as pairwise aligner fixes problematic bl2seq alignment Strange alignment issues were found with this sequence in PyNAST 1.0. This tests that a good alignment is achieved with this seqeunce in later versions. """ template_alignment = LoadSeqs(data=template_128453.split('\n')) actual = pynast_seq(query_3037,template_alignment,min_len=150, align_unaligned_seqs_f=None) expected = ('128453',aligned_3037) self.assertEqual(actual,expected) query_3037 = DNA.makeSequence("CTGGGCCGTGTCTCAGTCCCAGTGTGGCTGATCATCCTCTCAGACCAGCTAAGGATCGTCGCCTTGGTGCGCCTTTACCACACCAACTAGCTAAAGGCGATAAATCTTTGATCTCGCGATATCATCCGGTATTAGCAGCAATTTCTCGCTGTTATTCCGAACCTGAGGGCAGATTCCCACGCGTTACGCACCCGTGCGCCACTAAGGCCG",Name=">v15D30.1.08_100583") aligned_3037 = DNA.makeSequence("----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------C-G----------------------------------------------------------------------------------GC-------------------------------CT--T--AG-T-GG-C-GC-A--C-------------GGG-TGCGT-A--AC-GC-G-T-G-GG---A-A--T-CT-G--C-C-CTC--AG-G------------------------------------------------------------------T-TC----GGA-AT-AA-CAG-------------------------C-G-A-----------------------GAA-A---TTG-CTG-CTAA-TA---CC-G--G-AT-G----------A--------------------T-------------------------------------AT-C-----------------------------------------------------------------------------------------------------------------------G-CG-A--------------------------------------------------------------------------------------------------------------------------------------G-A-T---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CAAA--G-A----------------------------------------------------------------------------------------------------------------------------------------TTT-A----------------------------------------------------------------------------------------------------------------------------------T---C-G--------------C----C-T--------------------------------------------------TT--A--G-CT-A----G---TTGG-T-G-TG-G-T----AAA-GG-C-G-C-ACCA--A-GG-C-G--A-CG-A------------TCC-T-T------AG-CT-G-G-TCT-G-AG----A--GG-AT--G-AT-C-AG-CCAC-A-CTGGG--A-C-TG-A-GA-C-AC-G-G-CCCAG------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------",Name="v15D30.1.08_100583 1..210") template_128453 = """>128453 ------------------------------------------------------------------------------------------------------AACTTGAGAGTTT-GA--T-TC-T-G-GCTC-AG-AA-CGAA-C-GC--TGG-C--G-GC-A-TG--C----T-T--AACACA-T-GC-A-AGT-CGA-A-CGA---------A-G------------------------------------------GC----------------------------------------------------TTC-G----------------------------------------------------------------------------------GC-------------------------------CT--T--AG-T-GG-C-GC-A--C-------------GGG-TGCGT-A--AC-GC-G-T-G-GG---A-A--T-CT-G--C-C-TTC--AG-G------------------------------------------------------------------T-AC----GGA-AT-AA-CTA-------------------------G-G-G-----------------------GAA-A---CTC-GAG-CTAA-TA---CC-G--T-AT-G----------A--------------------T-------------------------------------AT-C-----------------------------------------------------------------------------------------------------------------------G-AG-A--------------------------------------------------------------------------------------------------------------------------------------G-A-T---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CAAA--G-A----------------------------------------------------------------------------------------------------------------------------------------TTT-A----------------------------------------------------------------------------------------------------------------------------------T---C-G--------------C----C-T---G-AA-G---AT---G-A-----G-CCC-GCG--T-TGG--A------TT--A--G-CT-A----G---TTGG-T-A-GG-G-T----AAA-GG-C-T-T-ACCA--A-GG-C-G--A-CG-A------------TCC-A-T------AG-CT-G-G-TCT-G-AG----A--GG-AT--G-AT-C-AG-CCAC-A-CTGGG--A-C-TG-A-GA-C-AC-G-G-CCCAGA-CTCC-TAC-G--G-G-A-G-GC-A-GC-A-G-TG---GG-G-A-ATA-TTGGA-C-AA-T-GG--GG-GA-A----A-C-CC-T-GA-TC-CA-GCAA-TGCC-G-CG-T---G-A-G--T--GA-A-G--A--A-G-G-CC-----TT-AG---------G-G-T-T-G-T--A---AA-G-CTC--------TT-TT-A-C--C-CGG----GA-T--G---A-----------------------T--AA------------------------------T-GA-CA-GT-A-C-CG-G-GA-G---------AA-----------TAAGC-TCC-GG-C-TAA---C--T-CCGT--GCCA--G-C---A--GCCG---C-GG--TA-AT--AC---GG-AG-GGA-GCT-A-G-CG-TTGT-T-CGG-AA-TT-A--C-T--GGGC-GTA----AA-GCGT-AC--G-TA-G-G-C-G------------G--T-TT-A-A-T-AA----G-T-C-A---G-GGG-TG-A-AA-GC--CC-AGA-G--------------------------------------------------------------------CT-C-AA-------------------------------------------------------------------------CT-C-T-GG-AA-C----T-G-C-C-T-T--------T--GA-G-A-C-T-G-TTA--G-A-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-A-G-A-A-C-A-----T-AG--AA-G-A------------G-GT-A-AG-T----GG--AATT-CCG-A-GT--GT-A-GAG-GTGAAA-TT-CGT-AGAT-A-TT-C-GGA--AG-A-AC-A-CC-AG--T--G--GC-GAA-G--G-C---G----A--C-T-TACTG------G-TC-TA--------------------------------------------------------------TA-G-T-T--GA--CG-----CT-GA-GG--T-A-CGA--AA-G-C--------------G-TGGG-TAG-C-A-AACA--GG-ATTA-G-ATA-C-----CC-T-G-GTA-G-T----C-CA--C-G-CCG-T-AAA--C-GATG-AT--AA-CT---------A-GC--T--G-T-CC-G-GG-T--A--------------------------------------------------------------------------------------CAT-GG--------------------------------------------------------------------------------------------------------------------------------------------------T-A-T-CT--G-G-G-T-GG-C------GG--A----GC-TAA--CG-C-A-T--T--AA-GT--T----A-TCC-GCC-T-G-GG-GAG-TA---CGG-----T-C--G-C-A-A-GAT-T--AAA-ACTC-AAA---------GAAA-TTG-ACGGG-G-G-CCTG----C-A--C-A-A-GCG-GT-G--G--AG-CA-T--GT-GGT-TT-AATT-C-G-AAG-CAAC-G-CG-C-AG-A-A-CC-TT-A-CC-AGCGT-TT-G-AC-A-T-C-------------CTGA-T-C-------------G-CG-G-AAA--GT--G-GA-G-A-C--A-C-A-TT-C-T-T--T-C-----AG-------------------------------------T--TC-GG-----------------------------------------CT----G--------GA-TCA-G-A--GA---------------------------------------------------C-A-G-G-T-GCTG-CA-TGG-CT--GTC-GTC-A-GC-TC---G-TG-TC-G--TGA-GA-TGT-T-GG-G-TT-AA-GT-CCCGC-AA--------C-GAG-CGC-A-ACC-C-T-CA--CC--T-CTAG--T-T-G-C-C---AT-C-A--T----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TAAG----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T----T-G------------G----G---C-A--CT---------------T-T-A-G-A-GG-A--AC-T-G-CCG--G-T------------------------------------G-A---TAA----------------------------------G-C-C-G--G-A-GG-A--AGG-T--GGGG-A-TGAC-GTC--AAGT-C---CTC-A-T-G-G-C-C-CTT----AC-G--CG-C-T-GG-GC-TA-CAC-ACGTG-C--TA--CAATG---G-CGGT-G-A--C-AGA-GG-GC--------------------------------------------------------------------------------------------------C-G-C-A-A--G-CCTG-C--A---------------------------------------A-AG-G-T-----------T--A-G-CT---A----------A--TCT-C--------A-AAAAG-CC-G-T-C-T-CAG-TTC--------GGA-T-TGTTC-TC--T-GCAA-CT-C-------------------------------------------------------------------------------------------------G-AGAGC-A-T-G-AA-G-GC-GGAAT-CG-C-TA--G-TA-AT-C-G-C----GGA-TC-A-G-C-------AT--GCC-GC-G-GT-G-AAT-ACGT-T-CCCAGGCCT-TGTA----CACACCG-CCC-GTC-----A---CA--CCA-TG-GG-A--G---TTG-G-AT-TC-ACC--C-GAA------G--G-CGC-TG-C-G-C-T-AA-C-C-C-----------------------------------------------------------G-CA-A---------------------------------------------------------------------------------------------------G--GG-A--GG-C--A---GG-CGA--CC--ACG-G----T-GGG-TT-TAG------------------------CG--ACT-GGGG-TG-AAG-TCGTAACAA-GGTAG-CCGT-AGGGGAA-CCTG-CGGC-TGGATCACCTCCTTTCTAAGGA---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------""" db_aln2 = LoadSeqs(data=dict([ ('1','ACGT--ACGTAC-ATA-C-----CC-T-G-GTA-G-T---'), ('2','AGGTTTACGTAG-ATA-C-----CC-T-G-GTA-G-T---'),\ ('3','AGGTACT-CCAC-ATA-C-----CC-T-G-GTA-G-T---'), ('4','TCGTTCGT-----ATA-C-----CC-T-G-GTA-G-T---'), ('5','ACGTACGT-TA--ATA-C-----CC-T-G-GTA-G-T---')]),\ moltype=DNA,aligned=DenseAlignment) template_14990_trimmed = """>14990_5_and_3_prime_lost_four_bases_each --------------------------------------------------------------------------------------------------------------------------------------AG-GA-CGAA-C-GC--TGG-C--G-GC-G-TG--C----C-T--AATACA-T-GC-A-AGT-CGA-G-CGG---------A-A---ATTTTA--------------------------TTGG---TG----------------------------------------------------CTT-G----------------------------------------------------------------------------------CAC-CTT-------------------TAAAAT-TT--T--AG-C-GGCG-G--A--C-------------GGG-TGAGT-A--AC-AC-G-T-G-GG---TAA--C-CTAC--C-T--TA--TA-G------------------------------------------------------------------A-TT----GGG-AT-AA-CTC-------------------------C-G-G-----------------------GAA-A---CCG-GGG-CTAATAC---CG-A----AT-A---------------------------------A-TA-C-T--T--T----------------TTA---AC-------------------------------------------------------------------------------------------------------------------------A-CA-T--------------------------------------------------------------------------------------------------------------------------------------G-T-T--TGA---------------A--A---G-T-T-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAAA--G-A-C-GG-----T-----T-----------------------------------------------------------------------------------------------------------------------TCG--------------------------------------------------------------------------------------------------------------------------G--C--TG--T---C-A--------------C----T-A---T-AA-G---AT---G-G-----A-CCC-GCG--G-CGC--A------TT--A--G-CT-A----G---TTGG-T-G-AG-G-T----AAC-GG-C-T-C-ACCA--A-GG-C-A--A-CG-A------------TGC-G-T------AG-CC-G-A-CCT-G-AG----A--GG-GT--G-AT-C-GG-CCAC-A-CTGGG--A-C-TG-A-GA-C-AC-G-G-CCCAGA-CTCC-TAC-G--G-G-A-G-GC-A-GC-A-G-TA---GG-G-A-ATC-TTCCA-C-AA-T-GG--AC-GA-A----A-G-TC-T-GA-TG-GA-GCAA-CGCC-G-CG-T---G-A-G--T--GA-A-G--A--A-G-G-AT-----TT-CG---------G-T-T-C-G-T--A---AA-A-CTC--------TG-TT-G-C--A-AGG----GA-A--G---AACAAGT---AGCG-TA----G--T--AA-C---T----G-----G--C-GCT-ACC-TT-GA-CG-GT-A-C-CT-T-GT-T---------AG-----------AAAGC-CAC-GG-C-TAA---C--T-ACGT--GCCA--G-C---A--GCCG---C-GG--TA-AT--AC---GT-AG-GTG-GCA-A-G-CG-TTGT-C-CGG-AA-TT-A--T-T--GGGC-GTA----AA-GCGC-GC--G-CA-G-G-T-G------------G--T-TC-C-T-T-AA----G-T-C-T---G-ATG-TG-A-AA-GC--CC-CCG-G--------------------------------------------------------------------CT-C-AA-------------------------------------------------------------------------CC-G-G-GG-AG------G-GTC-A-T-T--------G--GA-A-A-C-T-G-GGG--A-A-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-T-G-A-G-T-G-----C-AG--AA-G-A------------G-GA-T-AG-T----GG--AATT-CCA-A-GT--GT-A-GCG-GTGAAA-TG-CGT-AGAG-A-TT-T-GGA--GG-A-AC-A-CC-AG--T--G--GC-GAA-G--G-C---G----A--C-T-GTCTG------G-TC-TG--------------------------------------------------------------TA-A-C-T--GA--CA-----CT-GA-GG--C-G-CGA--AA-G-C--------------G-TGGG-GAG-C-A-AACA--GG-ATTA-G-ATA-C-----CC-T-G-GTA-G-T----C-CA--C-G-CCG-T-AAA--C-GATG-AG--TG-CT---------A-AG--T--G-T-TG-G-GG-G--G--T------------------------------------------------------------------------------------TT-CC----------------------------------------------------------------------------------------------------------------------------------------------G---C-C-C-CT--C-A-G-T-GC-T------GC--A----GC-TAA--CG-C-A-T--T--AA-GC--A----C-TCC-GCC-T-G-GG-GAG-TA---CGG-----T-C--G-C-A-A-GAC-T--GAA-ACTC-AAA---------GGAA-TTG-ACGGG-G-G-CCCG----C-A--C-A-A-GCG-GT-G--G--AG-CA-T--GT-GGT-TT-AATT-C-G-AAG-CAAC-G-CG-A-AG-A-A-CC-TT-A-CC-AGGTC-TT-G-AC-A-TCC--------------CGG-T-G-------------A-CC-A-C-T--AT--G-GA-G-A-C--A-T-A--G-T-T-T--C-C-----CC-------------------------------------T--TC-G------------------------------------------GG----G----G--CAA-CGG---T--GA---------------------------------------------------C-A-G-G-T-GGTG-CA-TGG-TT--GTC-GTC-A-GC-TC---G-TG-TC-G--TGA-GA-TGT-T-GG-G-TT-AA-GT-CCCGC-AA--------C-GAG-CGC-A-ACC-C-T-TA--TT--C-TTAG--T-T-G-C-C---AT-C-A--T----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TCAG----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T----T-G------------G----G---C-A--CT---------------C-T-A-A-G-GA-G--AC-T-G-CCG--G-T------------------------------------G-A---TAA----------------------------------A-C-C-G--G-A-GG-A--AGG-T--GGGG-A-TGAC-GTC--AAAT-C---ATC-A-T-G-C-C-C-CTT----AT-G--AC-C-T-GG-GC-TA-CAC-ACGTG-C--TA--CAATG---G-ACGG-T-A--C-AAA-CG-GT--------------------------------------------------------------------------------------------------T-G-C-C-A--A-CCCG-C--G---------------------------------------A-GG-G-G-----------G--A-G-CT---A----------A--TCC-G------A-T-AAAAC-CG-T-T-C-T-CAG-TTC--------GGA-T-TGTAG-GC--T-GCAA-CT-C-------------------------------------------------------------------------------------------------G-CCTAC-A-T-G-AA-G-CC-GGAAT-CG-C-TA--G-TA-AT-C-G-C----GGA-TC-A-G-C-------AT--GCC-GC-G-GT-G-AAT-ACGT-T-CCCGGGCCT-TGTA----CACACCG-CCC-GTC-----A---CA--CCA-CG-AG-A--G---TTT-G-TA-AC-ACC--C-GAA------G--T-CGG-TG-A-G-G-T-AA-C-C-T-----------------------------------------------------------T-TA-----------------------------------------------------------------------------------------------------T--GG-A-C-C-C--A---CC-CGC--CG--AAG-G----T-GGG-AT-AAA------------------------TA--ATT-GGGG-TG-AAT-TCTTAACAA-GGTAC-CCGT-ATCGGAA-GGTG-CGGC-TGG------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ """ input_seq_14990 = """>14990 GCTCAGGACGAACGCTGGCGGCGTGCCTAATACATGCAAGTCGAGCGGAAATTTTATTGGTGCTTGCACCTTTAAAATTTTAGCGGCGGACGGGTGAGTAACACGTGGGTAACCTACCTTATAGATTGGGATAACTCCGGGAAACCGGGGCTAATACCGAATAATACTTTTTAACACATGTTTGAAAGTTGAAAGACGGTTTCGGCTGTCACTATAAGATGGACCCGCGGCGCATTAGCTAGTTGGTGAGGTAACGGCTCACCAAGGCAACGATGCGTAGCCGACCTGAGAGGGTGATCGGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTAGGGAATCTTCCACAATGGACGAAAGTCTGATGGAGCAACGCCGCGTGAGTGAAGAAGGATTTCGGTTCGTAAAACTCTGTTGCAAGGGAAGAACAAGTAGCGTAGTAACTGGCGCTACCTTGACGGTACCTTGTTAGAAAGCCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGTGGTTCCTTAAGTCTGATGTGAAAGCCCCCGGCTCAACCGGGGAGGGTCATTGGAAACTGGGGAACTTGAGTGCAGAAGAGGATAGTGGAATTCCAAGTGTAGCGGTGAAATGCGTAGAGATTTGGAGGAACACCAGTGGCGAAGGCGACTGTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTGGGGGGTTTCCGCCCCTCAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGGTCGCAAGACTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCCGGTGACCACTATGGAGACATAGTTTCCCCTTCGGGGGCAACGGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTATTCTTAGTTGCCATCATTCAGTTGGGCACTCTAAGGAGACTGCCGGTGATAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGACGGTACAAACGGTTGCCAACCCGCGAGGGGGAGCTAATCCGATAAAACCGTTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCCGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGAGGTAACCTTTATGGACCCACCCGCCGAAGGTGGGATAAATAATTGGGGTGAATTCTTAACAAGGTACCCGTATCGGAAGGTGCGGCTGGATCA""" input_seq_10116 = """>10116 CTGGTCCGTGTCTCAGTACCAGTGTGGGGGACCTTCCTCTCAGAACCCCTACGCATCGTCGCCTTGGTGGGCCGTTACCCCACCAACTATCTAATCAGACGCGAGCCCATCTCTGAGCGAATTTCTTTGATATTCAAATCATGCGATTTAAATATGTTATGAGGTATTACCATCCGTTTCCAGAAGCTATCCCTCTCTCAGAGGCAGGTTGCTCACGTGTTACTCACCCGTTCGCCACTCAACTCTTCATCGGTGAGTGCAAGCACTCGGTGATGAAGAAGTTTCGTTCGACTTGCATGTATTAGGCACGCCGCCAGCGTTCATCCTGAGCCAGGATCAAACTCTG""" expected_fail1 = [('FAKE1 here is some desc.73602 tag1;tag2, tag3:tag4',\ 'AGGCGGCTACCTGGACCAACACTGACACTGAGGCACGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCCTAAACGATGCGAACTGGATGTTGGGTGCAATTTGGCACGCAGTATCGAAGCTAACGCGTTAAGTTCGCCGCCTGGGGAGTACGGTCGCAAGACTTAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGTATGTGGTTTAATTCGATGCAACGCGAAGAACCTTACCTGGTCTTGACATCCACGGAACTTTCCATAGATGGATTGGTGCCTTCGGGAACCGTGAGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGTCCTTAGTTGCCAGCACGTAATGGTGGGAACTCTAAGGAGACCGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAGTCATCATGGCCCTTAGGGGACCAGGGCTACACACGTACTACAATGGTAGGGACAGAGGGCTGCAAACCCGCGAGGGCAAGCCAATCCCAGAAACCCTATCTCAGTCCGGATTGGAGTTTGCAACTCGACTCCATGAAGTCGGAATCGCTAGTAATCGCAGATCAGCATTGCTGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCATGGGAGTTTGTTGCACCAGAAGCAGGTAGCTTAACCTTCGGGAGGGCGCTCACGGTGTGGCCGATGACTGGGGTGAAGTCGTAACAAGGTAGCCGTATCGGAAGGTGCGGCTGGATCACCTCCTTTTGAGCATGACGTCATCGTCCTGTCGGGCGTCCTCACAAATTACCTGCATTCAGAGATGCGTATCGGCACAGGCCGGTATGCGAAAGTCCCATCATGGGGCCTTAGCTCAGCTGGGAGAGCACCTGCTTTGCAAGCAGGGGGTCGTCGGTTCGATCCCGACAGGCTCCACCATTTGAGTGAAACGACTTTGGGTCTGTAGCTCAGGTGGTTAGAGCGCACCCCTGATAAGGGTGAGGTCGGTGGTTCGAGTCCTCCCAGACCCACCACTCTGAATGTAGTGCACACTTAAGAATTTATATGGCTCAGCGTTGAGGCTGAGACATGTTCTTTTATAACTTGTGACGTAGCGAGCGTTTGAGATATCTATCTAAACGTGTCGTTGAGGCTAAGGCGGGGACTTCGAGTCCCTAAATAATTGAGTCGTATGTTCGCGTTGGTGGCTTTGTACCCCACACAACACGGCGTATGGCCCCGAGGCAACTTGGGGTTATATGGTCAAGCGAATAAGCGCACACGGTGGATGCCTAGGCGGTCAGAGGCGATGAAGGACGTGGTAGCCTGCGAAAAGTGTCGGGGAGCTGGCAACAAGCTTTGATCCGGCAATATCCGAATGGGGAAACCCGG')] input_seqs2 = """>AKIW1129_fasta.screen.Contig1 description field GAGTTTGATCATGGCTCAGGACGAACGCTGGCGGCGTGCCTAATACATGCAAGTCGAGCGAATGACAGAGGAGCTTGCTCCTCTCGATTTAGCGGCGGACGGGTGAGTAACACGTGGGTAACCTGCCTTATAGCTTGGGATAACTCCGGGAAACCGGGGCTAATACCGAATAATACTTTTGGACACATGTTCGAAAGTTGAAAGATGGTTCTGCTATCACTATAAGATGGACCCGCGCTGCATTAGCTAGTTGGTGAGGTAACGGCTCACCAAGGCCACGATGCATAGCCGACCTGAGAGGGTGATCGGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTAGGGAATCTTCCACAATGGACGAAAGTCTGATGGAGCAACGCCGCGTGAGTGAAGAAGGATTTCGGTTCGTAAAACTCTGTTGTAAGGGAAGAACAAGTACAGTAGTAACTGGCTGTACCTTGACGGTACCTTATTAGAAAGCCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGTGGTCCTTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGGGACTTGAGTGCAGAAGAGGATAGTGGAATTCCAAGTGTAGCGGTGAAATGCGTAGAGATTTGGAGGAACACCAGTGGCGAAGGCGACTGTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTGGGGGGTTTCCGCCCCTCAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGGTCGCAAGACTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCCATTGACCACTGTAGAGATACAGTTTTCCCTTCGGGGACAACGGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGTTCTTAGTTGCCATCATTTAGTTGGGCACTCTAAGGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGACGGTACAAACGGTTGCCAACCCGCGAGGGGGAGCTAATCCGATAAAACCGTTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCCGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGAGGTAACCTTTTGGAGCCAGCCGCCGAAGGTGGGATAGATGATTGGGGTGAAGTCGTAACAAGGT""" input_seqs_gaps = """>FAKE1 here is some desc.73602 tag1;tag2, tag3:tag4 AGGCGGCTACCTGGACCAACACTGACACTGAGGCACGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCCTAAACGATGCGAACTGGATGTTGGGTGCAATTTGGCACGCAGTATCGAAGCTAACGCGTTAAGTTCGCCGCCTGGGGA GTACGGTCGCAAGACTTAAA-----CTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGTATGTGGTTTAATTCGATGCAACGCGAAGAACCTTACCTGGTCTTGACATCCACGGAACTTTCCATAGATGGATTGGTGCCTTCGGGAACCGTGAGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCC TTGTCCTTAGTTGCCAGCACGTAAT---------GGTGGGAACTCTAAGGAGACCGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAGTCATCATGGCCCTTAGGGGACCAGGGCTACACACGTACTACAATGGTA-GGGACAGAGGGCTGCAAACCCGCGAGGGCAAGCCAATCCCAGAAACCCTATCTCAGTCCGGATTGGAGTTTGCAACTCGACTCCATGAAGTCGGAATCGCTAGTAATCGCAGATCAGCATTGCTGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCATGGGAGTTTGTTGCACCAGAA GCAGGTAGCTTAACCTTCGGGAGGGCGCTCACGGTGTGGCCGATGACTGGGGTGAAGTCGTAACAAGGTAGCCGTATCGGAAGGTGCGGCTGGATCACCTCCTTTTGAGCATGACGTCATCGTCCTGTCGGGCGTCCTCACAAATTACCTGCATTCAGAGATGCGTATCGGCACAGGCCGGTATGCGAAAGTCCCATCATGGGGCCTTAGCTCAGCTGGGAGAGCACCTGCTTTGCAAGCAGGGGGTCGTCGGTTCGATCCCGACAGGCTCCACCATTTGAGTGAAACGACTTTGGGTCTGTAGCTCAGGTGGTTAGAGCGCACCCCTGATAAGGGTGAGGTCGGTGGTTCGAGTCCTCCC-----------------AGACCCACCACTCTGAATGTAGTGCACACTTAAGAATTTATATGGCTCAGCGTTGAGGCTGAGACATGTTCTTTTATAACTTGTGACGTAGCGAGCGTTTGAGATATCTATCTAAACGTGTCGTTGAGGCTAAGGCGGGGACTTCGAGTCCCTAAATAATTGAGTCGTATGTTCGCGTTGGTGGCTTTGTACCCCACACAACACGGCGTATGGCCCCG--AGGCAACTTGGGGT TATATGGTCAAGCGAATAAGCGCACACGGTGGATGCCTAGGCGGTCAGAGGCGA----TGAAGGACGTGGTAGCCTGCGAAAAGTGTCGGGGAGCTGGCAACAAGCTTTGATCCGGCAATATCCGAATGGGGAAACCCGG >AKIW1129_fasta.screen.Contig1 description field GAGTTTGATCATGGCTCAGGACGAACGCTGGCGGCGTGCCTAATACATGCAAGTCGAG-------------CGAATGACAGAGGAGCTTGCTCCTCTCGATTTAGCGGCGGACGGGTGAGTAACACGTGGGTAACCTGCCTTATAGCTTGGGATAACTCCGGGAAACCGGGGCTAATAC-CGAATAATACTTTTGGACACATGTTCGAAAGTTGAAAGATGGTTCTGCTATCACTATAAGATGGACCCGCGCTGCATTAGCTAGTTGGTGAGGTAACGGCTCACCAAGG-CCACGATGCATAGCCGACCTGAGAGGGTGATCGGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTAGGGAATCTTCCACAATGGACGAAAGT------CTGATGGAGCAACGCCGCGTGAGTGAAGAAGGATTTCGGTTCGTAAAACTCTGTTGTAAGGGAAGAACAAGTACAGTAGTAACTGGCTGTACCTTGACGGTACCTTATTAGAAAGCCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGTGGTCCTT--------TAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGGGACTTGAGTGCAGAAGAGGATAGTGGAATTCCAAGTGTAGCGGTGAAATGCGTAGAGATTTGGAGGAACACCAGTGGCGAAGGCGACTGTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTGGGGGGTTTCCGCCCCTCAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGGTCGCAAGACTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCCAT-------------TGACCACTGTAGAGATACAGTTTTCCCTTCGGGGACAACGGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGC----AACCCTTGTTCTTAGTTGCCATCATTTAGTTGGGCACTCTAAGGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCT-GGGCTACACACGTGCTACAATGGACGGTACAAACGGTTGCCAACCCGCGAGGGGGAGCTAATCCGATAAAACCGTTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTAC-------ATGAAGCCGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCG---------AAGTCGGTGAGGTAACCTTTTGGAGCCAGCCGCCGAAGGTGGGATAGATGATTGGGGTGAAGTCGTAACAAGGT """ expected_logfile_contents = \ """1\t23\t\t5\t100.00\t23 2\t2\tNo search results. """ expected_stringent_logfile_contents = \ """1\t23\tNo search results. """ pynast_test_template_fasta1 = """>128618 ----------------------------------------------------------------------------------------------------------GGAGAGTTT-GA--T-CC-T-G-GCTC-AG-GA-CGAA-C-GC--TGG-C--G-GC-G-TG--C----C-T--AATACA-T-GC-A-AGT-CGA-G-CGG---------A-C---CG-A----------------------------CGGG---AG----------------------------------------------------CTT-G----------------------------------------------------------------------------------CTC-TCT-------------------TA--G--GT--C--AG-C-GGCG-G--A--C-------------GGG-TGAGT-A--AC-AC-G-T-G-GG---TAA--C-CTGC--C-T--GT--AA-G------------------------------------------------------------------A-CT----GGG-AT-AA-CTC-------------------------C-G-G-----------------------GAA-A---CCG-GGG-CTAATAC---CG-G----AT-G---------------------------------C-TT-G-A--T--T----------------GAA---CC-------------------------------------------------------------------------------------------------------------------------G-CA-T--------------------------------------------------------------------------------------------------------------------------------------G-G-T--TCC---------------A--A--TC-A-T-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AAAA--G-G-T-GG-----C-----T----------------------------------------------------------------------------------------------------------------------TTCA--------------------------------------------------------------------------------------------------------------------------G--C--TA--C---C-A--------------C----T-T---A-CA-G---AT---G-G-----A-CCC-GCG--G-CGC--A------TT--A--G-CT-A----G---TTGG-T-G-AG-G-T----AAC-GG-C-T-C-ACCA--A-GG-C-G--A-CG-A------------TGC-G-T------AG-CC-G-A-CCT-G-AG----A--GG-GT--G-AT-C-GG-CCAC-A-CTGGG--A-C-TG-A-GA-C-AC-G-G-CCCAGA-CTCC-TAC-G--G-G-A-G-GC-A-GC-A-G-TA---GG-G-A-ATC-TTCCG-C-AA-T-GG--AC-GA-A----A-G-TC-T-GA-CG-GA-GCAA-CGCC-G-CG-T---G-A-G--T--GA-T-G--A--A-G-G-TT-----TT-CG---------G-A-T-C-G-T--A---AA-A-CTC--------TG-TT-G-T--T-AGG----GA-A--G---AACAAGT---ACCG-TT----C--G--AA-T---A----G-----GG-C-GGT-ACC-TT-GA-CG-GT-A-C-CT-A-AC-C---------AG-----------AAAGC-CAC-GG-C-TAA---C--T-ACGT--GCCA--G-C---A--GCCG---C-GG--TA-AT--AC---GT-AG-GTG-GCA-A-G-CG-TTGT-C-CGG-AA-TT-A--T-T--GGGC-GTA----AA-GCGC-GC--G-CA-G-G-C-G------------G--T-TT-C-T-T-AA----G-T-C-T---G-ATG-TG-A-AA-GC--CC-CCG-G--------------------------------------------------------------------CT-C-AA-------------------------------------------------------------------------CC-G-G-GG-AG------G-GTC-A-T-T--------G--GA-A-A-C-T-G-GGG--A-A-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-T-G-A-G-T-G-----C-AG--AA-G-A------------G-GA-G-AG-T----GG--AATT-CCA-C-GT--GT-A-GCG-GTGAAA-TG-CGT-AGAG-A-TG-T-GGA--GG-A-AC-A-CC-AG--T--G--GC-GAA-G--G-C---G----A--C-T-CTCTG------G-TC-TG--------------------------------------------------------------TA-A-C-T--GA--CG-----CT-GA-GG--C-G-CGA--AA-G-C--------------G-TGGG-GAG-C-G-AACA--GG-ATTA-G-ATA-C-----CC-T-G-GTA-G-T----C-CA--C-G-CCG-T-AAA--C-GATG-AG--TG-CT---------A-AG--T--G-T-TA-G-AG-G--G--T------------------------------------------------------------------------------------TT-CC----------------------------------------------------------------------------------------------------------------------------------------------G---C-C-C-TT--T-A-G-T-GC-T------GC--A----GC-AAA--CG-C-A-T--T--AA-GC--A----C-TCC-GCC-T-G-GG-GAG-TA---CGG-----T-C--G-C-A-A-GAC-T--GAA-ACTC-AAA---------GGAA-TTG-ACGGG-G-G-CCCG----C-A--C-A-A-GCG-GT-G--G--AG-CA-T--GT-GGT-TT-AATT-C-G-AAG-CAAC-G-CG-A-AG-A-A-CC-TT-A-CC-AGGTC-TT-G-AC-A-T-C--------------CTC-T-G-------------A-CA-A-C-C--CT--A-GA-G-A-T--A-G-G--G-C-T-T--C-C-----CC-------------------------------------T--TC-G------------------------------------------GG----G----G---CA-GAG---T--GA---------------------------------------------------C-A-G-G-T-GGTG-CA-TGG-TT--GTC-GTC-A-GC-TC---G-TG-TC-G--TGA-GA-TGT-T-GG-G-TT-AA-GT-CCCGC-AA--------C-GAG-CGC-A-ACC-C-T-TG--AT--C-TTAG--T-T-G-C-C---AG-C-A--T----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TCAG----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T----T-G------------G----G---C-A--CT---------------C-T-A-A-G-GT-G--AC-T-G-CCG--G-T------------------------------------G-A---CAA----------------------------------A-C-C-G--G-A-GG-A--AGG-T--GGGG-A-TGAC-GTC--AAAT-C---ATC-A-T-G-C-C-C-CTT----AT-G--AC-C-T-GG-GC-TA-CAC-ACGTG-C--TA--CAATG---G-GCAG-A-A--C-AAA-GG-GC--------------------------------------------------------------------------------------------------A-G-C-G-A--A-GCCG-C--G---------------------------------------A-GG-C-T-----------A--A-G-CC---A----------A--TCC-C------A-C-AAATC-TG-T-T-C-T-CAG-TTC--------GGA-T-CGCAG-TC--T-GCAA-CT-C-------------------------------------------------------------------------------------------------G-ACTGC-G-T-G-AA-G-CT-GGAAT-CG-C-TA--G-TA-AT-C-G-C----GGA-TC-A-G-C-------AT--GCC-GC-G-GT-G-AAT-ACGT-T-CCCGGGCCT-TGTA----CACACCG-CCC-GTC-----A---CA--CCA-CG-AG-A--G---TTT-G-TA-AC-ACC--C-GAA------G--T-CGG-TG-A-G-G-T-AA-C-C-T-----------------------------------------------------------T-TT--------------------------------------------------------------------------------------------------------GG-A-G-C-C--A---GC-CGC--CG--AAG-G----T-GGG-AC-AGA------------------------TG--ATT-GGGG-TG-AAG-TCGTAACAA-GGTAG-CCGT-ATCGGAA-GGTG-CGGC-TGGATCACCTCCTTTCT-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- >81187 ---------------------------------------------------------------------------------------------------------------AGAGTTTGAT-CC-T-G-GCTC-AG-AG-TGAA-C-GC--TGG-C--G-GC-A-TG--C----C-T--AACACA-T-GC-A-AGT-CGA-A-CG----------G-TAA-CA-G------------------------------GC-C-CG----------------------------------------------------CAA-G----------------------------------------------------------------------------------GG---T------------------G-CT--G--AC--G--AG-T-GG-C-GG-A--C-------------GGG-TGAGG-A--AC-AC-A-T-C-GG---A-A--T-TT-G--C-C-CAG--AC-G------------------------------------------------------------------T-GG----GGG-AT-AA-CGT-------------------------A-G-G-----------------------GAA-A---CTT-ACG-CTAA-TA---CC-G--C-AT-A----------C--------------------G-------------------------------------TC-C-----------------------------------------------------------------------------------------------------------------------T-AC-G--------------------------------------------------------------------------------------------------------------------------------------G-G-A---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAAA--G-C-G-GG-----G--GA-T--C--------------------------------------------------------------------------------------------------------------------GCA-A----------------------------------------------------------------------------------------------------------------------A----CC-TC--G---C-G--------------C----G-G---T-TG-G---AT---G-A-----G-CCG-ATG--T-CGG--A------TT--A--G-CT-A----G---TTGG-C-G-GG-G-T----AAG-AG-C-C-C-ACCA--A-GG-C-G--A-CG-A------------TCC-G-T------AG-CT-G-G-TCT-G-AG----A--GG-AT--G-AT-C-AG-CCAC-A-TTGGG--A-C-TG-A-GA-C-AC-G-G-CCCAAA-CTCC-TAC-G--G-G-A-G-GC-A-GC-A-G-TG---GG-G-A-ATA-TTGGA-C-AA-T-GG--GG-GC-A----A-C-CC-T-GA-TC-CA-GCAA-TGCC-G-CG-T---G-T-G--T--GA-A-G--A--A-G-G-CC-----TT-CG---------G-G-T-T-G-T--A---AA-G-CAC--------TT-TT-A-T--C-AGG----AA-C--G---AA-ACGC---GCTT-GG----T--G--AA-T---A----G-----CA-G-GTG-AAC--T-GA-CG-GT-A-C-CT-G-AG-G---------AA-----------TAAGC-ACC-GG-C-TAA---C--T-TCGT--GCCA--G-C---A--GCCG---C-GG--TA-AT--AC---GA-AG-GGT-GCA-A-G-CG-TTAC-T-CGG-AA-TT-A--C-T--GGGC-GTA----AA-GGGT-GC--G-TA-G-G-T-G------------G--T-TG-T-T-T-AA----G-T-C-T---G-CTG-TG-A-AA-GC--CC-CGG-G--------------------------------------------------------------------CT-C-AA-------------------------------------------------------------------------CC-T-G-GG-AA-T----G-G-C-A-G-T--------G--GA-T-A-C-T-G-GGC--A-G-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-A-G-A-A-T-G-----C-GG--TA-G-A------------G-GG-T-AG-T----GG--AATT-CCC-G-GT--GT-A-GCA-GTGAAA-TG-CGT-AGAG-A-TC-G-GGA--GG-A-AC-A-CC-AG--T--G--GC-GAA-G----C---G----G--C-T-ACCTG------G-AC-CA--------------------------------------------------------------GC-A-T-T--GA--CA-----CT-CA-AG--C-A-CGA--AA-G-C--------------G-TGGG-GAG-C-A-AACA--GG-ATTA-G-ATA-C-----CC-T-G-GTA-G-T----C-CA--C-G-CCC-T-AAA--C-GATG-TC--TA-CT---------A-GT--T--G-T-CG-G-GT-C--T---------------------------------------------------------------------------------------TA-AT--------------------------------------------------------------------------------------------------------------------------------------------------T-G-A-CT--T-G-G-T-AA-C------GC--A----GC-TAA--CG-C-G-T--G--AA-GT--A----G-ACC-GCC-T-G-GG-GAG-TA---CGG-----T-C--G-C-A-A-GAT-T--AAA-ACTC-AAA---------GGAA-TTG-ACGGG-G-A-CCCG----C-A--C-A-A-GCG-GT-G--G--AT-GA-T--GT-GGA-TT-AATT-C-G-ATG-CAAC-G-CG-A-AA-A-A-CC-TT-A-CC-TACC--TT-G-AC-A-T-G--------------GCT-G-G-------------A-AT-C-C-C--GG--A-GA-G-A-T--T-T-G--G-G-A-G--T-GC----TC-------------------------------------G--AA-A------------------------------------------GA---GA----A---CC-AGT---A--CA---------------------------------------------------C-A-G-G-T-GCTG-CA-TGG-CT--GTC-GTC-A-GC-TC---G-TG-TC-G--TGA-GA-TGT-T-GG-G-TT-AA-GT-CCCGC-AA--------C-GAG-CGC-A-ACC-C-T-TG--TC--A-TTAG--T-T-G-C-T---A--C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAAA-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------G------------G----G---C-A--CT---------------C-T-A-A-T-GA-G--AC-T-G-CCG--G-T------------------------------------G-A---CAA----------------------------------A-C-C-G--G-A-GG-A--AGG-T--GGGG-A-TGAC-GTC--AAGT-C---CTC-A-T-G-G-C-C-CTT----AT-G--GG-T-A-GG-GC-TT-CAC-ACGTC-A--TA--CAATG---G-TACA-T-A--C-AGA--C-GC--------------------------------------------------------------------------------------------------C-G-C-C-A--A-CCCG-C--G---------------------------------------A-GG-G-G-----------G--A-G-CT---A----------A--TCG-C------A-G-AAAGT-GT-A-T-C-G-TAG-TCC--------GGA-T-TGTAG-TC--T-GCAA-CT-C-------------------------------------------------------------------------------------------------G-ACTGC-A-T-G-AA-G-TT-GGAAT-CG-C-TA--G-TA-AT-C-G-C----GGA-TC-A-G-C-------AT--GTC-GC-G-GT-G-AAT-ACGT-T-CCCGGGTCT-TGTA----CACACCG-CCC-GTC-----A---CA--CCA-TG-GG-A--G---CGG-G-TT-TT-ACC--A-GAA------G--T-AGG-TA-G-C-T-T-AA-C-C-------------------------------------------------------------G-CA-A------------------------------------------------------------------------------------------------------GG-A--GG-G--C---GC-TTA--CC--ACG-G----T-AGG-AT-TCG------------------------TG--ACT-GGGGTGAAGTCGTAACAAGGTAAC----C----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- >58677 --------------------------------------------------------------------------------------------------------------------------C--T-G-GCTC-AG-GA-CGAA-C-GC--TGG-C--G-GC-G-TG--C----C-T--AATACA-T-GC-A-AGT-CGA-G-CGG---------A-C---CA-A-------------------------------A-T-CG------------------------------------------------GAGCTTGCT----------------------------------------------------------------------------------CTGG--------------------T-TT--G--GT--C--AG-C-GG-C-GG-A--C-------------GGG-TGAGT-A--AC-AC-G-T-G-GG---CAA--C-CT-G--C-C-CGC--AA-G------------------------------------------------------------------A-CC----GGG-AT-AA-CTC-------------------------C-G-G-----------------------GAA-A---CCG-GAG-CTAA-TA---CC-G--G-AT-A----------A--------------------C-A--C-C-G--A--A-----------------GA---CC-G-----------------------------------------------------------------------------------------------------------------------C-AT-G--------------------------------------------------------------------------------------------------------------------------------------G---T--C-T---------------T--T-G-G-T-T-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAAA--G-G-C-GG-----C-CTTTG-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GC-TG--T---C-A--------------C----T-T---G-CG-G---AT---G-G-----G-CCC-GCG--G-CGC--A------TT--A--G-CT-A----G---TTGG-T-G-AG-G-T----AAC-GG-C-T-C-ACCA--A-GG-C-G--A-CG-A------------TGC-G-T------AG-CC-G-G-CCT-G-AG----A--GG-GT--G-AC-C-GG-CCAC-A-CTGGG--A-C-TG-A-GA-C-AC-G-G-CCCAGA-CTCC-TAC-G--G-G-A-G-GC-A-GC-A-G-TA---GG-G-A-ATC-TTCCG-C-AA-T-GG--GC-GA-A----A-G-CC-T-GA-CG-GA-GCGA-CGCC-G-CG-T---G-A-G--C--GA-A-G--A--A-G-G-CC-----TT-CG---------G-G-T-C-G-T--A---AA-G-CTC--------TG-TT-G-T--G-AGG----GA-C--G---AAGGAGC---GCCG-TT----C--G--AA-G---A----G-----GG-C-GGC-GCG-GT-GA-CG-GT-A-C-CT-C-AC-G---------AG-----------AAAGC-CCC-GG-C-TAA---C--T-ACGT--GCCA--G-C---A--GCCG---C-GG--TA-AT--AC---GT-AG-GGG-GCG-A-G-CG-TTGT-C-CGG-AA-TT-A--T-T--GGGC-GTA----AA-GCGC-GC--G-CA-G-G-C-G------------G--T-CC-C-T-T-AA----G-T-C-T---G-ATG-TG-A-AA-GC--CC-ACG-G--------------------------------------------------------------------CT-C-AA-------------------------------------------------------------------------CC-G-T-GG-AG-G----G-T-C-A-T-T--------G--GA-A-A-C-T-G-GGG--G-A-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-T-G-A-G-T-G-----C-AG--GA-G-A------------G-GA-G-AG-C----GG--AATT-CCA-C-GT--GT-A-GCG-GTGAAA-TG-CGT-AGAG-A-TG-T-GGA--GG-A-AC-A-CC-AG--T--G--GC-GAA-G--G-C---G----G--C-T-CTCTG------G-CC-TG--------------------------------------------------------------CA-A-C-T--GA--CG-----CT-GA-GG--C-G-CGA--AA-G-C--------------G-TGGG-GAG-C-A-AACA--GG-ATTA-G-ATA-C-----CC-T-G-GTA-G-T----C-CA--C-G-CCG-T-AAA--C-GATG-AG--TG-CT---------A-AG--T--G-T-TA-G-AG-G----------------------------------------------------------------------------------------GGTC-ACAC--------------------------------------------------------------------------------------------------------------------------------------------------C-C-TT--T-A-G-T-GC-T------GC--A----GC-TAA--CG-C-G-A--T--AA-GC--A----C-TCC-GCC-T-G-GG-GAG-TA---CGG-----C-C--G-C-A-A-GGC-T--GAA-ACTC-AAA---------GGAA-TTG-ACGGG-G-G-CCCG----C-A--C-A-A-GCG-GT-G--G--AG-CA-T--GT-GGT-TT-AATT-C-G-AAG-CAAC-G-CG-A-AG-A-A-CC-TT-A-CC-AGGTC-TT-G-AC-A-T-C--------------CCC-T-G-------------A----C-A-A--CC--CAAG-A-G-A--T-T-G--G-G-C-G--T-TC----CC-----------------------------------CCTT-CG-G------------------------------------------GG---GG----A---CA-GGG---T--GA---------------------------------------------------C-A-G-G-T-GGTG-CA-TGG-TT--GTC-GTC-A-GC-TC---G-TG-TC-G--TGA-GA-TGT-T-GG-G-TT-AA-GT-CCCGC-AA--------C-GAG-CGC-A-ACC-C-T-CG--CC--T-CTAG--T-T-G-C-C---AG-C-A--T----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TCAG----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T----T-G------------G----G---C-A--CT---------------C-T-A-G-A-GG-G--AC-T-G-CCG--G-C------------------------------------G-A---CAA----------------------------------G-T-C-G--G-A-GG-A--AGG-T--GGGG-A-TGAC-GTC--AAAT-C---ATC-A-T-G-C-C-C-CTT----AT-G--AC-C-T-GG-GC-TA-CAC-ACGTG-C--TA--CAATG---G-GCGG-T-A--C-AAA-GG-GC--------------------------------------------------------------------------------------------------T-G-C-G-A--A-CCCG-C--G---------------------------------------A-GG-G-G-----------G--A-G-CG---A----------A--TCC-C------A-A-AAAGC-CG-C-T-C-T-CAG-TTC--------GGA-T-TGCAG-GC--T-GCAA-CT-C-------------------------------------------------------------------------------------------------G-CCTGC-A-T-G-AA-G-CC-GGAAT-CG-C-TA--G-TA-AT-C-G-C----GGA-TC-A-G-C-------AT--GCC-GC-G-GT-G-AA-TACGT-T-CCCGGGCCT-TGTA----CACACCG-CCC-GTC-----A---CA--CCA-CG-AG-A--G---CTT-G-CA-AC-ACC--C-GAA------G--T-CGG-TG-A-G-G-C-AA-C-C-C-----------------------------------------------------------G-CA-A---------------------------------------------------------------------------------------------------G--GG-A--GC-C--A---GC-CGC--CG--AAG-G----T-GGG-GC-AAG------------------------TG--ATT-GGGG-TG-AAG-TCGTAACAA-GGTAG-CCGT-ACCGGAA--GTG-CGGCTGGATCACCCTCCTT----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- >14308 -------------------------------------------------------------------------------------------------------------------------------------------------------TGG-C--G-GC-G-TG--C----C-T--AACACA-T-GC-A-AGT-CGC-G-CGA---------G-A---AA-G----------------------------CTGC-T--C----------------------------------------------------TTT-G----------------------------------------------------------------------------------AG--CAGT----------------T--A--G--TA--A--AG-C-GG-C-GG-A--C-------------GGG-TGAGT-A--AC-GC-G-T-G-AG---TAA--T-CT-A--C-C-TTT--AA-G------------------------------------------------------------------T-CT----GAT-AT-AA-CTT-------------------------C-T-C-----------------------GAA-A---GGG-AAG-CTAA-TT---TC-G--G-AT-A---------------------------------T-TA-T-G--C--T----------------GCC---TG-G-----------------------------------------------------------------------------------------------------------------------A-TA-A--------------------------------------------------------------------------------------------------------------------------------------C-C-A--G-G---------------C--T-G-C-A-T-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CAAA--G-G-C-GG-----C-----T-----------------------------------------------------------------------------------------------------------------------TTT--------------------------------------------------------------------------------------------------------------------------T--GC-CT--C---C-G--------------C----T-T---T-TA-G---AT---G-T-----G-CTC-GCG--T-CCC--A------TT--A--G-CT-T----G---TTGG-T-G-AG-A-T----AAC-AG-C-T-C-ACCA--A-GG-C-T--G-CG-A------------TGG-G-T------AG-CC-G-A-CCT-G-AG----A--GG-GT--G-AT-C-GG-CCAC-A-CTGGG--A-C-TG-A-GA-C-AC-G-G-CCCAGA-CTCC-TAC-G--G-G-A-G-GC-T-GC-A-G-TG---GG-G-A-ATC-TTTCG-C-AA-T-GA--GC-GC-A----A-G-CT-T-GA-CG-AA-GCGA-CGCC-G-CG-T---G-A-G--T--GA-T-G--A--A-G-G-CC-----TT-CG---------G-G-T-C-G-T--A---AA-G-CTC--------TG-TC-C-T--C-AGG----GA-A--G---AACATCT---TAGT-AG----T--G--AA-T--------A-----AC-T-GCT-AGGCTT-GA-CG-GT-A-C-CT-G-AG-A---------AG-----------AAAGC-TCC-GG-C-TAA---C--T-ACGT--GCCA--G-C---A--GCCG---C-GG--TA-AT--AC---GT-AG-GGG-GCA-A-G-CG-TTGT-C-CGG-AA-TC-A--T-T--GGGC-GTA----AA-GGGT-GC--G-CA-G-G-C-G------------G--T-CT-G-G-C-AA----G-T-C-A---A-GTG-TG-A-AA-TG--TA-TCG-G--------------------------------------------------------------------CT-T-AA-------------------------------------------------------------------------CT-G-A-TA-CA------C-TGC-G-C-T--------T--GA-A-A-C-T-G-TCA--G-A-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-T-G-A-G-G-G-----C-AA--GA-G-A------------A-GA-G-AG-C----GG--AATT-CCT-A-GT--GT-A-GCG-GTGAAA-TG-CGT-AGAT-A-TT-A-GGA--AG-A-AC-A-CC-AG--T--G--GC-GAA-A--G-C---G----G--C-T-CTCTG------G-CT-TG--------------------------------------------------------------AC-C-C-T--GA--CG-----CT-GA-GG--C-A-CGA--AA-G-C--------------T-AGGG-GAG-C-G-AACG--GG-ATTA-G-ATA-C-----CC-C-G-GTA-G-T----C-CT--G-G-CTG-T-AAA--C-GCTG-GA--TA-CT---------A-GG--T--G-T-TG-G--G-G--G--T------------------------------------------------------------------------------------TC-AA----------------------------------------------------------------------------------------------------------------------------------------------C---T-C-C-CT--C-A-G-T-GC-T------GC--A----GT-TAA--CG-C-G-T--T--AA-GT--A----T-CCC-GCC-T-G-GG-GAT-TA---CGA-----C-C--G-C-A-A-GGT-T--GAA-ACTC-AAA---------GGAA-TTG-ACGGG-G-GCCT-G----C-A--C-A-A-GCG-GC-G--G--AG-CA-T--GT-GGT-TT-AATT-C-G-AAG-CAAC-G-CG-C-AG-A-A-CC-TT-A-CC-AGGGC-TT-G-AC-A-T-C------------CCGTGAC-T-------------A-TC-T-G-T--CA--A-CA-G-C-A--G-A-A--T-T-T-G---------GTCC------------------------------------T--TT-G------------------------------------------GA----T----C---AC-ACG-G-T--GA---------------------------------------------------C-A-G-G-T-GGTG-CA-TGG-CT--GTC-GTC-A-GC-TC---G-TG-TC-G--TGA-GA-TGT-T-GG-G-TT-AA-GT-CCCGC-AA--------C-GAG-CGC-A-ACC-C-C-TA--TC--C-TTAG--T-T-G-C-C---AG-C-A--T----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TAAG----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T----T-G------------G----G---G-A--CT---------------C-T-A-G-G-GA-G--AC-T-G-CCA--G-T------------------------------------C-A---AAA----------------------------------A-C-T-G--G-A-GG-A--AGG-T--GGGG-A-TGAC-GTC--AAGT-C---ATC-A-T-G-C-C-C-CTT----AT-G--CT-C-T-GG-GC-TA-CAC-ACGTG-C--TA--CAATG---G-CCTG-T-A--C-AGA-GG-GC--------------------------------------------------------------------------------------------------T-G-C-T-A--T-ACCG-C--A---------------------------------------A-GG-T-T-----------T--A-G-CC---A----------A--T-C-C------T-C-AAAAC-AG-G-T-C-C-CAG-TTC--------GGA-T-TGCTG-GC--T-GCAA-CT-C-------------------------------------------------------------------------------------------------G-CCTGC-A-T-G-AA-G-CT-GGAGT-CG-C-TA--G-TA-AT-C-G-C----GGA-TC-A-G-A-------AT--GCC-GC-G-GT-G-AAT-CCGT-T-CCCAGGCCT-TGTA----CACACCG-CCC-GTC-----A---CA--CCA-CC-CG-A--G---TTG-G-AT-GC-ACC--A-GAA------G--T-CG--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- >100011 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-T--AACACA-T-GC-A-AGT-CGA-A-C-G-----------A---TA-A----------------------------CCTGG--AG----------------------------------------------------CT--G----------------------------------------------------------------------------------CTC-T-A-------------------GG-GA--AT--T--AG-T-GG-C-GA-A--C-------------GGA-GTGAG-T--AC-AC-G-T-G-AG---TAA--C-CT-G--C-C-CTT--GA-C------------------------------------------------------------------T-CT----GGG-AT-AA-CCT-------------------------C-C-G-----------------------GAA-A---CGG-AAG-CTAA------CC-G--G-AT-A---------------------------------T-GA-C-G--C--------------------AC---GGAG-----------------------------------------------------------------------------------------------------------------------G-CA-T-------------------------------------------------------------------------------------------------------------------------------------CT-C----CTG---------------T--G-C-G-T-G-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAAA--G-A----------------------------------------------------------------------------------------------------------------------------------------ACT------------------------------------------------------------------------------------------------------------------------------------T---C-G--------------G----T-C---A-AG-G---AT---G-G-----A-CTC-GCG--G-CCT--A------TC--A--G-GT-A----G---TTGG-T-G-AG-G-T----AAC-GGCC-C---ACCA--A-GC-C----TACG-A------------CGG-G-T------A--CC-G-G-CCT-G-AG----A--GG-GT--G-AC-C-GG-CCAC-A-CTGGG--A-C-TG-A-TA-C-AC-G-G-CC-AGA-CTCC-TAC-G--G-G---G-GC-A-GC-ACGGTG---GG-G-A-ATA-TTGCA-C-AA-T-GG--GC-GA-A----A-G-CC-T-GA-TG-CA-GCA--CGCC-G-CG-T---A-G-G--G----------A--C-G-G-CC-----TT-CG---------G-G-T-T-G--------AA-C-CT---------TT-TT-A-T--T-AGG----GA-A--G---AAGC------------------------A-A---------------------------GT-GA-CG-GT-A-C-CT-G-TA------------A-----------AAAGC-ACC-GG-C-TAA---C--T-ACGT--GCCA--G-C---A--GCGG-----GG--TA-AT--AC---GT-AG-GGT-GCG-A-G-CG-TTGT-C-CGG-AA-TT-A--T-T--GGGC-GTA----AA-GAGC-TC--G-TA-G-G-C-G------------G--T-CT-G-T-C-GC----G-T-C-T---G-C-G-TGAG-AA-A---AC-CAG-G--------------------------------------------------------------------CT-C-AA-------------------------------------------------------------------------CC-T-C-GG-GC-T----T-G-C-A-G-T--------G--GA-T-A-C-G-G-GCA--G-A-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-A-G-A-G-T-------C-GG--TA-G-G------------G-GA-G-AA-T----GG--AATT-C---G-GT--GT---GCG-GTGGAA-TG-CGC-AGAT-A-TC-A-GGA--GG-A-CC---CC-GA--T--G--GC-GAA-T--G-C---A----G--T-T-CTCTG------G-C--CG-------------------------------------------------------------TA--A-C-T--GA--CA-----CT-GA-G---A-T-CGA--AA-G-C--------------G-TGGG-A---C-G-AACA--GG-ATTA-G-ATA-C-----CC-T-G-GTA-G-T----C-CA--C-G-CCG-T-AA---C-GTTG-CG--CT--T---------A-GA--T--G-T-GG-G-GA-C--C-------------------------------------------------------------------------------------ATTC-CACG------------------------------------------------------------------------------------------------------------------------------------------------G-T-T--T--C-C-G-T-GT-C------G---A----GC-TAA--CG-C-A-T--T--AA-TG--C----G-CCC-GCC-T-G-GG-GAG-TA---CGG-----C----G-C-A-A-GGC-T--AAA--CTC-AAG------------A-TTG-ACGGG-G-G-CCCG----C-A--C-A-C-GCG-AG-----------A-T--GC-GGA-TT-AATT-G-A-TCG-CAAC-G-CG-A-AG-A-A-CC-TT-A-CC-AAGGC-TT-G-AC-A-T-A------------C-ACG-A-G-------------A-TA---C-G-GGCCAGAAA-T-G-G----T----C-A-A-C----------TC---------------------------------------TTTGG------------------------------------------AC----------AC-TC-AGT---G--AA---------------------------------------------------C-A-G-G-T-GGTG-CA-TGG-TT--GTC-GTC-A-GC-TC---G-TG-TC-A--TGA-GA-TGT-T-GG-G-TT-AA-GT-CCCGC-AA--------C-GAG-CGC-A-ACC-C-C-TG--TG--G-TTAG--T-T-G-C-C---AG-C-A--C--G-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TAA------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TG---G----T-G------------G----G---A-A--CT---------------C-A-T-A-G-GA-G--AC-T-G-CC---G-G------------------------------------G-T---CAA----------------------------------C-T---G--G-A-GG----AGG-T--GGGG-A-TGAC-GTC--AAAT-A---ATC-A-T-G--CC-C-CTT----AT-G--TC-T-T-GG-GC-TT-CAC-GTATG-C--TA--CAATG---C-CGGT-A-A--C-AAA-GG-GC--------------------------------------------------------------------------------------------------T-G-C-A-A--T-ACCG-T--A---------------------------------------A-GG-T-G-----------G--A---CG---A----------A--TCC-C------A-A-AAA-C-CG-G-T-C-T-CAG-TTC--------GGA-T-TGAGG-TC--T-GCAA-CT-C-------------------------------------------------------------------------------------------------G-ACCTC-A-T-G--A-G-TC-GGA-T-CG---TA--G-TA-AT-C-G-C----AGA-TC-A----A------AC--GCT--C-G-GT-G--AT-ACGT----CCCGGCCT-TGT-----CACACCG-CCC-GTC-----A---AG--TCA-TG-AA-A--G----TC-G-GA-AC-ACC--C-GA----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- """ input_seqs1_fasta = """>FAKE1 here is some desc.73602 tag1;tag2, tag3:tag4 AGGCGGCTACCTGGACCAACACTGACACTGAGGCACGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCCTAAACGATGCGAACTGGATGTTGGGTGCAATTTGGCACGCAGTATCGAAGCTAACGCGTTAAGTTCGCCGCCTGGGGA GTACGGTCGCAAGACTTAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGTATGTGGTTTAATTCGATGCAACGCGAAGAACCTTACCTGGTCTTGACATCCACGGAACTTTCCATAGATGGATTGGTGCCTTCGGGAACCGTGAGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCC TTGTCCTTAGTTGCCAGCACGTAATGGTGGGAACTCTAAGGAGACCGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAGTCATCATGGCCCTTAGGGGACCAGGGCTACACACGTACTACAATGGTAGGGACAGAGGGCTGCAAACCCGCGAGGGCAAGCCAATCCCAGAAACCCTATCTCAGTCCGGATTGGAGTTTGCAACTCGACTCCATGAAGTCGGAATCGCTAGTAATCGCAGATCAGCATTGCTGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCATGGGAGTTTGTTGCACCAGAA GCAGGTAGCTTAACCTTCGGGAGGGCGCTCACGGTGTGGCCGATGACTGGGGTGAAGTCGTAACAAGGTAGCCGTATCGGAAGGTGCGGCTGGATCACCTCCTTTTGAGCATGACGTCATCGTCCTGTCGGGCGTCCTCACAAATTACCTGCATTCAGAGATGCGTATCGGCACAGGCCGGTATGCGAAAGTCCCATCATGGGGCCTTAGCTCAGCTGGGAGAGCACCTGCTTTGCAAGCAGGGGGTCGTCGGTTCGATCCCGACAGGCTCCACCATTTGAGTGAAACGACTTTGGGTCTGTAGCTCAGGTGGTTAGAGCGCACCCCTGATAAGGGTGAGGTCGGTGGTTCGAGTCCTCCCAGACCCACCACTCTGAATGTAGTGCACACTTAAGAATTTATATGGCTCAGCGTTGAGGCTGAGACATGTTCTTTTATAACTTGTGACGTAGCGAGCGTTTGAGATATCTATCTAAACGTGTCGTTGAGGCTAAGGCGGGGACTTCGAGTCCCTAAATAATTGAGTCGTATGTTCGCGTTGGTGGCTTTGTACCCCACACAACACGGCGTATGGCCCCGAGGCAACTTGGGGT TATATGGTCAAGCGAATAAGCGCACACGGTGGATGCCTAGGCGGTCAGAGGCGATGAAGGACGTGGTAGCCTGCGAAAAGTGTCGGGGAGCTGGCAACAAGCTTTGATCCGGCAATATCCGAATGGGGAAACCCGG >AKIW1129_fasta.screen.Contig1 description field GAGTTTGATCATGGCTCAGGACGAACGCTGGCGGCGTGCCTAATACATGCAAGTCGAGCGAATGACAGAGGAGCTTGCTCCTCTCGATTTAGCGGCGGACGGGTGAGTAACACGTGGGTAACCTGCCTTATAGCTTGGGATAACTCCGGGAAACCGGGGCTAATACCGAATAATACTTTTGGACACATGTTCGAAAGTTGAAAGATGGTTCTGCTATCACTATAAGATGGACCCGCGCTGCATTAGCTAGTTGGTGAGGTAACGGCTCACCAAGGCCACGATGCATAGCCGACCTGAGAGGGTGATCGGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTAGGGAATCTTCCACAATGGACGAAAGTCTGATGGAGCAACGCCGCGTGAGTGAAGAAGGATTTCGGTTCGTAAAACTCTGTTGTAAGGGAAGAACAAGTACAGTAGTAACTGGCTGTACCTTGACGGTACCTTATTAGAAAGCCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGTGGTCCTTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGGGACTTGAGTGCAGAAGAGGATAGTGGAATTCCAAGTGTAGCGGTGAAATGCGTAGAGATTTGGAGGAACACCAGTGGCGAAGGCGACTGTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTGGGGGGTTTCCGCCCCTCAG TGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGGTCGCAAGACTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCCATTGACCACTGTAGAGATACAGTTTTCCCTTCGGGGACAACGGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGTTCTTAGTTGCCATCATTTAGTTGGGCACTCTAAGGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGACGGTACAAACGGTTGCCAACCCGCGAGGGGGAGCTAATCCGATAAAACCGTTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCCGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGAGGTAACCTTTTGGAGCCAGCCGCCGAAGGTGGGATAGATGATTGGGGTGAAGTCGTAACAAGGT >AKIW521_fasta.screen.Contig1 gagtttgatcatggctcagattgaacgctggcggcatgccttacacatgcaagtcgaacggcagcgcggggcaacctggcggcgagtggcgaacgggtgagtaatacatcggaacgtacccagaagtgggggataacgtagcgaaagttacgctaataccgcatacgttctacggaagaaagtgggggatcttcggacctcatgcttttggagcggccgatgtctgattagctagttggtgaggtaaaggctcaccaaggcgacgatcagtagctggtctgagaggacgaccagccacactgggactgagacacggcccagactcctacgggaggcagcagtggggaattttggacaatgggcgcaagcctgctccagcaatgccgcgtgagtgaagaagg ccttcgggttgtaaagctcttttgtcagggaagaaacggctgaggttaataccttcggctaatgacggtacctgaagaataagcgccggctaactacgtgccagcagccgcggtaatacgtagggtgcaagcgttaatcggaattactgggcgtaaagcgtgcgcaggcggttttgtaagtctgacgtgaaatccccgggctcaacctgggaattgcgttggagactgcaaggctagagtctggcagaggggggtagaattccacgtgtagcagtgaaatgcgtagagatgtggaggaacaccgatgggcgaaggcagccccctgggtcaagactgacgctcatgcacgaaagcgtggggagcaaacaggattagataccctggtagtccacgcc ctaaacgatgtctactagttgtcgggtcttaattgacttggtaacgcagctaacgcgtgaagtag accgcctggggagtacggtcacaagattaaaactcaaaggaattgacggggacccgcacaagcggtggatgatgtggattaattcgatgcaacgcgaaaaaccttacctacccttgacatgtcaggaatcctcgagagattgaggagtgcccgaaagggaacctgaacacaggtgctgcatggctgtcgtcagctcgtgtcgtgagatgttgggttaagtcccgcaacgagcgcaacccttgtcattagttgctacgaaagggcactctaatgagactgccggtgacaa accggaggaaggtgggga tgacgtcaagtcctcatggcccttatgggtagggcttcacacgtcatacaatggtacatacagagggccgccaacccgcgagggggagctaatcccagaaagtgtatcgtagtccggatcgcagtctgcaactcgactgcgtgaagttggaatcgctagtaatcgcggatcagcatgccgcggtgaatacgttcccgggtcttgtacacaccgcccgtcacaccatgggagcgggttttaccagaagtaggtagcttaaccgcaaggggggcgcttaccacggtaggattcgtgactggggtgaagtcgtaacaaggtaa >modified_AKIW1129_both_ends_extended CCGGAATTCCTTTTAAGAGTTTGATCATGGCTCAGGACGAACGCTGGCGGCGTGCCTAATACATGCAAGTCGAGCGAATGACAGAGGAGCTTGCTCCTCTCGATTTAGCGGCGGACGGGTGAGTAACACGTGGGTAACCTGCCTTATAGCTTGGGATAACTCCGGGAAACCGGGGCTAATACCGAATAATACTTTTGGACACATGTTCGAAAGTTGAAAGATGGTTCTGCTATCACTATAAGATGGACCCGCGCTGCATTAGCTAGTTGGTGAGGTAACGGCTCACCAAGGCCACGATGCATAGCCGACCTGAGAGGGTGATCGGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTAGGGAATCTTCCACAATGGACGAAAGTCTGATGGAGCAACGCCGCGTGAGTGAAGAAGGATTTCGGTTCGTAAAACTCTGTTGTAAGGGAAGAACAAGTACAGTAGTAACTGGCTGTACCTTGACGGTACCTTATTAGAAAGCCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGTGGTCCTTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGGGACTTGAGTGCAGAAGAGGATAGTGGAATTCCAAGTGTAGCGGTGAAATGCGTAGAGATTTGGAGGAACACCAGTGGCGAAGGCGACTGTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTGGGGGGTTTCCGCCCCTCAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGGTCGCAAGACTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCCATTGACCACTGTAGAGATACAGTTTTCCCTTCGGGGACAACGGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGTTCTTAGTTGCCATCATTTAGTTGGGCACTCTAAGGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGACGGTACAAACGGTTGCCAACCCGCGAGGGGGAGCTAATCCGATAAAACCGTTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCCGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGAGGTAACCTTTTGGAGCCAGCCGCCGAAGGTGGGATAGATGATTGGGGTGAAGTCGTAACAAGGTCCGGAATTCCTTTTAA >modified_AKIW1129_5_prime_end_extended CCGGAATTCCTTTTAAGAGTTTGATCATGGCTCAGGACGAACGCTGGCGGCGTGCCTAATACATGCAAGTCGAGCGAATGACAGAGGAGCTTGCTCCTCTCGATTTAGCGGCGGACGGGTGAGTAACACGTGGGTAACCTGCCTTATAGCTTGGGATAACTCCGGGAAACCGGGGCTAATACCGAATAATACTTTTGGACACATGTTCGAAAGTTGAAAGATGGTTCTGCTATCACTATAAGATGGACCCGCGCTGCATTAGCTAGTTGGTGAGGTAACGGCTCACCAAGGCCACGATGCATAGCCGACCTGAGAGGGTGATCGGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTAGGGAATCTTCCACAATGGACGAAAGTCTGATGGAGCAACGCCGCGTGAGTGAAGAAGGATTTCGGTTCGTAAAACTCTGTTGTAAGGGAAGAACAAGTACAGTAGTAACTGGCTGTACCTTGACGGTACCTTATTAGAAAGCCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGTGGTCCTTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGGGACTTGAGTGCAGAAGAGGATAGTGGAATTCCAAGTGTAGCGGTGAAATGCGTAGAGATTTGGAGGAACACCAGTGGCGAAGGCGACTGTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTGGGGGGTTTCCGCCCCTCAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGGTCGCAAGACTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCCATTGACCACTGTAGAGATACAGTTTTCCCTTCGGGGACAACGGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGTTCTTAGTTGCCATCATTTAGTTGGGCACTCTAAGGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGACGGTACAAACGGTTGCCAACCCGCGAGGGGGAGCTAATCCGATAAAACCGTTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCCGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGAGGTAACCTTTTGGAGCCAGCCGCCGAAGGTGGGATAGATGATTGGGGTGAAGTCGTAACAAGGT >modified_AKIW1129_3_prime_end_extended GAGTTTGATCATGGCTCAGGACGAACGCTGGCGGCGTGCCTAATACATGCAAGTCGAGCGAATGACAGAGGAGCTTGCTCCTCTCGATTTAGCGGCGGACGGGTGAGTAACACGTGGGTAACCTGCCTTATAGCTTGGGATAACTCCGGGAAACCGGGGCTAATACCGAATAATACTTTTGGACACATGTTCGAAAGTTGAAAGATGGTTCTGCTATCACTATAAGATGGACCCGCGCTGCATTAGCTAGTTGGTGAGGTAACGGCTCACCAAGGCCACGATGCATAGCCGACCTGAGAGGGTGATCGGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTAGGGAATCTTCCACAATGGACGAAAGTCTGATGGAGCAACGCCGCGTGAGTGAAGAAGGATTTCGGTTCGTAAAACTCTGTTGTAAGGGAAGAACAAGTACAGTAGTAACTGGCTGTACCTTGACGGTACCTTATTAGAAAGCCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGTGGTCCTTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGGGACTTGAGTGCAGAAGAGGATAGTGGAATTCCAAGTGTAGCGGTGAAATGCGTAGAGATTTGGAGGAACACCAGTGGCGAAGGCGACTGTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTGGGGGGTTTCCGCCCCTCAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGGTCGCAAGACTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCCATTGACCACTGTAGAGATACAGTTTTCCCTTCGGGGACAACGGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGTTCTTAGTTGCCATCATTTAGTTGGGCACTCTAAGGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGACGGTACAAACGGTTGCCAACCCGCGAGGGGGAGCTAATCCGATAAAACCGTTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCCGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGAGGTAACCTTTTGGAGCCAGCCGCCGAAGGTGGGATAGATGATTGGGGTGAAGTCGTAACAAGGTGATTACACCGGAATTCCTTTTAA""" input_seqs1_aligned_fasta = """>AKIW1129_fasta.screen.Contig1 description field 1..1507 -------------------------------------------------------------------------------------------------------------GAGTTT-GA--T-CA-T-G-GCTC-AG-GA-CGAA-C-GC--TGG-C--G-GC-G-TG--C----C-T--AATACA-T-GC-A-AGT-CGA-G-CGA---------A-T---GA-C---------------------------AGAGG---AG----------------------------------------------------CTT-G----------------------------------------------------------------------------------CTCCTCT-------------------CG--A--TT--T--AG-C-GGCG-G--A--C-------------GGG-TGAGT-A--AC-AC-G-T-G-GG---TAA--C-CTGC--C-T--TA--TA-G------------------------------------------------------------------C-TT----GGG-AT-AA-CTC-------------------------C-G-G-----------------------GAA-A---CCG-GGG-CTAATAC---CG-A----AT-A---------------------------------A-TA-C-T--T--T----------------TGG---AC-------------------------------------------------------------------------------------------------------------------------A-CA-T--------------------------------------------------------------------------------------------------------------------------------------G---T--TCG---------------A--A--AG-T-T-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAAA--G-A-T-GG----------------------------------------------------------------------------------------------------------------------------------TTCT--------------------------------------------------------------------------------------------------------------------------G--C--TA--T---C-A--------------C----T-A---T-AA-G---AT---G-G-----A-CCC-GCG--C-TGC--A------TT--A--G-CT-A----G---TTGG-T-G-AG-G-T----AAC-GG-C-T-C-ACCA--A-GG-C-C--A-CG-A------------TGC-A-T------AG-CC-G-A-CCT-G-AG----A--GG-GT--G-AT-C-GG-CCAC-A-CTGGG--A-C-TG-A-GA-C-AC-G-G-CCCAGA-CTCC-TAC-G--G-G-A-G-GC-A-GC-A-G-TA---GG-G-A-ATC-TTCCA-C-AA-T-GG--AC-GA-A----A-G-TC-T-GA-TG-GA-GCAA-CGCC-G-CG-T---G-A-G--T--GA-A-G--A--A-G-G-AT-----TT-CG---------G-T-T-C-G-T--A---AA-A-CTC--------TG-TT-G-T--A-AGG----GA-A--G---AACAAGT---ACAG-TA----G--T--AA-C---T----G-----G--C-TGT-ACC-TT-GA-CG-GT-A-C-CT-T-AT-T---------AG-----------AAAGC-CAC-GG-C-TAA---C--T-ACGT--GCCA--G-C---A--GCCG---C-GG--TA-AT--AC---GT-AG-GTG-GCA-A-G-CG-TTGT-C-CGG-AA-TT-A--T-T--GGGC-GTA----AA-GCGC-GC--G-CA-G-G-T-G------------G--T-CC-T-T-T-AA----G-T-C-T---G-ATG-TG-A-AA-GC--CC-ACG-G--------------------------------------------------------------------CT-C-AA-------------------------------------------------------------------------CC-G-T-GG-AG------G-GTC-A-T-T--------G--GA-A-A-C-T-G-GGG--G-A-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-T-G-A-G-T-G-----C-AG--AA-G-A------------G-GA-T-AG-T----GG--AATT-CCA-A-GT--GT-A-GCG-GTGAAA-TG-CGT-AGAG-A-TT-T-GGA--GG-A-AC-A-CC-AG--T--G--GC-GAA-G--G-C---G----A--C-T-GTCTG------G-TC-TG--------------------------------------------------------------TA-A-C-T--GA--CA-----CT-GA-GG--C-G-CGA--AA-G-C--------------G-TGGG-GAG-C-A-AACA--GG-ATTA-G-ATA-C-----CC-T-G-GTA-G-T----C-CA--C-G-CCG-T-AAA--C-GATG-AG--TG-CT---------A-AG--T--G-T-TG-G-GG-G--G--T------------------------------------------------------------------------------------TT-CC----------------------------------------------------------------------------------------------------------------------------------------------G---C-C-C-CT--C-A-G-T-GC-T------GC--A----GC-TAA--CG-C-A-T--T--AA-GC--A----C-TCC-GCC-T-G-GG-GAG-TA---CGG-----T-C--G-C-A-A-GAC-T--GAA-ACTC-AAA---------GGAA-TTG-ACGGG-G-G-CCCG----C-A--C-A-A-GCG-GT-G--G--AG-CA-T--GT-GGT-TT-AATT-C-G-AAG-CAAC-G-CG-A-AG-A-A-CC-TT-A-CC-AGGTC-TT-G-AC-A-TCC--------------CAT-T-G-------------A-CC-A-C-T--GT--A-GA-G-A-T--A-C-A--G-T-T-T--T-C-----CC-------------------------------------T--TC-G------------------------------------------GG----G----A--CAA-CGG---T--GA---------------------------------------------------C-A-G-G-T-GGTG-CA-TGG-TT--GTC-GTC-A-GC-TC---G-TG-TC-G--TGA-GA-TGT-T-GG-G-TT-AA-GT-CCCGC-AA--------C-GAG-CGC-A-ACC-C-T-TG--TT--C-TTAG--T-T-G-C-C---AT-C-A--T----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TTAG----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T----T-G------------G----G---C-A--CT---------------C-T-A-A-G-GA-G--AC-T-G-CCG--G-T------------------------------------G-A---CAA----------------------------------A-C-C-G--G-A-GG-A--AGG-T--GGGG-A-TGAC-GTC--AAAT-C---ATC-A-T-G-C-C-C-CTT----AT-G--AC-C-T-GG-GC-TA-CAC-ACGTG-C--TA--CAATG---G-ACGG-T-A--C-AAA-CG-GT--------------------------------------------------------------------------------------------------T-G-C-C-A--A-CCCG-C--G---------------------------------------A-GG-G-G-----------G--A-G-CT---A----------A--TCC-G------A-T-AAAAC-CG-T-T-C-T-CAG-TTC--------GGA-T-TGTAG-GC--T-GCAA-CT-C-------------------------------------------------------------------------------------------------G-CCTAC-A-T-G-AA-G-CC-GGAAT-CG-C-TA--G-TA-AT-C-G-C----GGA-TC-A-G-C-------AT--GCC-GC-G-GT-G-AAT-ACGT-T-CCCGGGCCT-TGTA----CACACCG-CCC-GTC-----A---CA--CCA-CG-AG-A--G---TTT-G-TA-AC-ACC--C-GAA------G--T-CGG-TG-A-G-G-T-AA-C-C-T-----------------------------------------------------------T-TT--------------------------------------------------------------------------------------------------------GG-A-G-C-C--A---GC-CGC--CG--AAG-G----T-GGG-AT-AGA------------------------TG--ATT-GGGG-TG-AAG-TCGTAACAA-GGT--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- >AKIW521_fasta.screen.Contig1 1..1488 ----------------------------------------------------------------------------------------------------------------GAGTTTGAT-CA-T-G-GCTC-AG-AT-TGAA-C-GC--TGG-C--G-GC-A-TG--C----C-T--TACACA-T-GC-A-AGT-CGA-A-CG----------G-CAG-C---------------------------------GC-G-GG----------------------------------------------------GCA-A----------------------------------------------------------------------------------CC---T------------------G-GC--G--GC--G--AG-T-GG-C-GA-A--C-------------GGG-TGAGT-A--AT-AC-A-T-C-GG---A-A--C-GT-A--C-C-CAG--AA-G------------------------------------------------------------------T-GG----GGG-AT-AA-CGT-------------------------A-G-C-----------------------GAA-A---GTT-ACG-CTAA-TA---CC-G--C-AT-A----------C--------------------G-------------------------------------TT-C-----------------------------------------------------------------------------------------------------------------------T-AC-G--------------------------------------------------------------------------------------------------------------------------------------G-A-A---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAAA--G-T-G-GG-----G--GA-T--C-------------------------------------------------------------------------------------------------------------------TTCG-G----------------------------------------------------------------------------------------------------------------------A----CC-TC--A---T-G--------------C----T-T---T-TG-G---AG---C-G-----G-CCG-ATG--T-CTG--A------TT--A--G-CT-A----G---TTGG-T-G-AG-G-T----AAA-GG-C-T-C-ACCA--A-GG-C-G--A-CG-A------------TCA-G-T------AG-CT-G-G-TCT-G-AG----A--GG-AC--G-AC-C-AG-CCAC-A-CTGGG--A-C-TG-A-GA-C-AC-G-G-CCCAGA-CTCC-TAC-G--G-G-A-G-GC-A-GC-A-G-TG---GG-G-A-ATT-TTGGA-C-AA-T-GG--GC-GC-A----A-G-CC-T-GC-TC-CA-GCAA-TGCC-G-CG-T---G-A-G--T--GA-A-G--A--A-G-G-CC-----TT-CG---------G-G-T-T-G-T--A---AA-G-CTC--------TT-TT-G-T--C-AGG----GA-A--G---AA-ACGG---CTGA-GG----T--T--AA-T---A----C-----CT-T-CGGCTAA--T-GA-CG-GT-A-C-CT-G-AA-G---------AA-----------TAAGC-GCC-GG-C-TAA---C--T-ACGT--GCCA--G-C---A--GCCG---C-GG--TA-AT--AC---GT-AG-GGT-GCA-A-G-CG-TTAA-T-CGG-AA-TT-A--C-T--GGGC-GTA----AA-GCGT-GC--G-CA-G-G-C-G------------G--T-TT-T-G-T-AA----G-T-C-T---G-ACG-TG-A-AA-TC--CC-CGG-G--------------------------------------------------------------------CT-C-AA-------------------------------------------------------------------------CC-T-G-GG-AA-T----T-G-C-G-T-T--------G--GA-G-A-C-T-G-CAA--G-G-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-A-G-A-G-T-C-----T-GG--CA-G-A------------G-GG-G-GG-T----AG--AATT-CCA-C-GT--GT-A-GCA-GTGAAA-TG-CGT-AGAG-A-TG-T-GGA--GG-A-AC-A-CC-GA--T--G-GGC-GAA-G---GC---A----G--C-C-CCCTG------G-GT-CA--------------------------------------------------------------AG-A-C-T--GA--CG-----CT-CA-TG--C-A-CGA--AA-G-C--------------G-TGGG-GAG-C-A-AACA--GG-ATTA-G-ATA-C-----CC-T-G-GTA-G-T----C-CA--C-G-CCC-T-AAA--C-GATG-TC--TA-CT---------A-GT--T--G-T-CG-G-GT-C--T---------------------------------------------------------------------------------------TA-AT--------------------------------------------------------------------------------------------------------------------------------------------------T-G-A-CT--T-G-G-T-AA-C------GC--A----GC-TAA--CG-C-G-T--G--AA-GT--A----G-ACC-GCC-T-G-GG-GAG-TA---CGG-----T-C--A-C-A-A-GAT-T--AAA-ACTC-AAA---------GGAA-TTG-ACGGG-G-A-CCCG----C-A--C-A-A-GCG-GT-G--G--AT-GA-T--GT-GGA-TT-AATT-C-G-ATG-CAAC-G-CG-A-AA-A-A-CC-TT-A-CC-TACC-CTT-G-AC-A-T-G--------------TCA-G-G-------------A-AT-C-C-T--CG--A-GA-G-A-T--T-G-A--G-G-A-G--T-GC----CC-------------------------------------G--AA-A------------------------------------------GG---GA----A---CC-TGA---A--CA---------------------------------------------------C-A-G-G-T-GCTG-CA-TGG-CT--GTC-GTC-A-GC-TC---G-TG-TC-G--TGA-GA-TGT-T-GG-G-TT-AA-GT-CCCGC-AA--------C-GAG-CGC-A-ACC-C-T-TG--TC--A-TTAG--T-T-G-C-T---A--C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAAA-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------G------------G----G---C-A--CT---------------C-T-A-A-T-GA-G--AC-T-G-CCG--G-T------------------------------------G-A---CAA----------------------------------A-C-C-G--G-A-GG-A--AGG-T--GGGG-A-TGAC-GTC--AAGT-C---CTC-A-T-G-G-C-C-CTT----AT-G--GG-T-A-GG-GC-TT-CAC-ACGTC-A--TA--CAATG---G-TACA-T-A--C-AGA--GGGC--------------------------------------------------------------------------------------------------C-G-C-C-A--A-CCCG-C--G---------------------------------------A-GG-G-G-----------G--A-G-CT---A----------A--TCC-C------A-G-AAAGT-GT-A-T-C-G-TAG-TCC--------GGA-T-CGCAG-TC--T-GCAA-CT-C-------------------------------------------------------------------------------------------------G-ACTGC-G-T-G-AA-G-TT-GGAAT-CG-C-TA--G-TA-AT-C-G-C----GGA-TC-A-G-C-------AT--GCC-GC-G-GT-G-AAT-ACGT-T-CCCGGGTCT-TGTA----CACACCG-CCC-GTC-----A---CA--CCA-TG-GG-A--G---CGG-G-TT-TT-ACC--A-GAA------G--T-AGG-TA-G-C-T-T-AA-C-C-------------------------------------------------------------G-CA-A------------------------------------------------------------------------------------------------------GG-G--GG-G--C---GC-TTA--CC--ACG-G----T-AGG-AT-TCG------------------------TG--ACT-GGGGTGAAGTCGTAACAAGGTAA----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- >modified_AKIW1129_both_ends_extended 16..1523 ------------------------------------------------------------------------------------------------------------AGAGTTT-GA--T-CA-T-G-GCTC-AG-GA-CGAA-C-GC--TGG-C--G-GC-G-TG--C----C-T--AATACA-T-GC-A-AGT-CGA-G-CGA---------A-T---GA-C---------------------------AGAGG---AG----------------------------------------------------CTT-G----------------------------------------------------------------------------------CTCCTCT-------------------CG--A--TT--T--AG-C-GGCG-G--A--C-------------GGG-TGAGT-A--AC-AC-G-T-G-GG---TAA--C-CTGC--C-T--TA--TA-G------------------------------------------------------------------C-TT----GGG-AT-AA-CTC-------------------------C-G-G-----------------------GAA-A---CCG-GGG-CTAATAC---CG-A----AT-A---------------------------------A-TA-C-T--T--T----------------TGG---AC-------------------------------------------------------------------------------------------------------------------------A-CA-T--------------------------------------------------------------------------------------------------------------------------------------G---T--TCG---------------A--A--AG-T-T-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAAA--G-A-T-GG----------------------------------------------------------------------------------------------------------------------------------TTCT--------------------------------------------------------------------------------------------------------------------------G--C--TA--T---C-A--------------C----T-A---T-AA-G---AT---G-G-----A-CCC-GCG--C-TGC--A------TT--A--G-CT-A----G---TTGG-T-G-AG-G-T----AAC-GG-C-T-C-ACCA--A-GG-C-C--A-CG-A------------TGC-A-T------AG-CC-G-A-CCT-G-AG----A--GG-GT--G-AT-C-GG-CCAC-A-CTGGG--A-C-TG-A-GA-C-AC-G-G-CCCAGA-CTCC-TAC-G--G-G-A-G-GC-A-GC-A-G-TA---GG-G-A-ATC-TTCCA-C-AA-T-GG--AC-GA-A----A-G-TC-T-GA-TG-GA-GCAA-CGCC-G-CG-T---G-A-G--T--GA-A-G--A--A-G-G-AT-----TT-CG---------G-T-T-C-G-T--A---AA-A-CTC--------TG-TT-G-T--A-AGG----GA-A--G---AACAAGT---ACAG-TA----G--T--AA-C---T----G-----G--C-TGT-ACC-TT-GA-CG-GT-A-C-CT-T-AT-T---------AG-----------AAAGC-CAC-GG-C-TAA---C--T-ACGT--GCCA--G-C---A--GCCG---C-GG--TA-AT--AC---GT-AG-GTG-GCA-A-G-CG-TTGT-C-CGG-AA-TT-A--T-T--GGGC-GTA----AA-GCGC-GC--G-CA-G-G-T-G------------G--T-CC-T-T-T-AA----G-T-C-T---G-ATG-TG-A-AA-GC--CC-ACG-G--------------------------------------------------------------------CT-C-AA-------------------------------------------------------------------------CC-G-T-GG-AG------G-GTC-A-T-T--------G--GA-A-A-C-T-G-GGG--G-A-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-T-G-A-G-T-G-----C-AG--AA-G-A------------G-GA-T-AG-T----GG--AATT-CCA-A-GT--GT-A-GCG-GTGAAA-TG-CGT-AGAG-A-TT-T-GGA--GG-A-AC-A-CC-AG--T--G--GC-GAA-G--G-C---G----A--C-T-GTCTG------G-TC-TG--------------------------------------------------------------TA-A-C-T--GA--CA-----CT-GA-GG--C-G-CGA--AA-G-C--------------G-TGGG-GAG-C-A-AACA--GG-ATTA-G-ATA-C-----CC-T-G-GTA-G-T----C-CA--C-G-CCG-T-AAA--C-GATG-AG--TG-CT---------A-AG--T--G-T-TG-G-GG-G--G--T------------------------------------------------------------------------------------TT-CC----------------------------------------------------------------------------------------------------------------------------------------------G---C-C-C-CT--C-A-G-T-GC-T------GC--A----GC-TAA--CG-C-A-T--T--AA-GC--A----C-TCC-GCC-T-G-GG-GAG-TA---CGG-----T-C--G-C-A-A-GAC-T--GAA-ACTC-AAA---------GGAA-TTG-ACGGG-G-G-CCCG----C-A--C-A-A-GCG-GT-G--G--AG-CA-T--GT-GGT-TT-AATT-C-G-AAG-CAAC-G-CG-A-AG-A-A-CC-TT-A-CC-AGGTC-TT-G-AC-A-TCC--------------CAT-T-G-------------A-CC-A-C-T--GT--A-GA-G-A-T--A-C-A--G-T-T-T--T-C-----CC-------------------------------------T--TC-G------------------------------------------GG----G----A--CAA-CGG---T--GA---------------------------------------------------C-A-G-G-T-GGTG-CA-TGG-TT--GTC-GTC-A-GC-TC---G-TG-TC-G--TGA-GA-TGT-T-GG-G-TT-AA-GT-CCCGC-AA--------C-GAG-CGC-A-ACC-C-T-TG--TT--C-TTAG--T-T-G-C-C---AT-C-A--T----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TTAG----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T----T-G------------G----G---C-A--CT---------------C-T-A-A-G-GA-G--AC-T-G-CCG--G-T------------------------------------G-A---CAA----------------------------------A-C-C-G--G-A-GG-A--AGG-T--GGGG-A-TGAC-GTC--AAAT-C---ATC-A-T-G-C-C-C-CTT----AT-G--AC-C-T-GG-GC-TA-CAC-ACGTG-C--TA--CAATG---G-ACGG-T-A--C-AAA-CG-GT--------------------------------------------------------------------------------------------------T-G-C-C-A--A-CCCG-C--G---------------------------------------A-GG-G-G-----------G--A-G-CT---A----------A--TCC-G------A-T-AAAAC-CG-T-T-C-T-CAG-TTC--------GGA-T-TGTAG-GC--T-GCAA-CT-C-------------------------------------------------------------------------------------------------G-CCTAC-A-T-G-AA-G-CC-GGAAT-CG-C-TA--G-TA-AT-C-G-C----GGA-TC-A-G-C-------AT--GCC-GC-G-GT-G-AAT-ACGT-T-CCCGGGCCT-TGTA----CACACCG-CCC-GTC-----A---CA--CCA-CG-AG-A--G---TTT-G-TA-AC-ACC--C-GAA------G--T-CGG-TG-A-G-G-T-AA-C-C-T-----------------------------------------------------------T-TT--------------------------------------------------------------------------------------------------------GG-A-G-C-C--A---GC-CGC--CG--AAG-G----T-GGG-AT-AGA------------------------TG--ATT-GGGG-TG-AAG-TCGTAACAA-GGT--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- >modified_AKIW1129_5_prime_end_extended 16..1523 ------------------------------------------------------------------------------------------------------------AGAGTTT-GA--T-CA-T-G-GCTC-AG-GA-CGAA-C-GC--TGG-C--G-GC-G-TG--C----C-T--AATACA-T-GC-A-AGT-CGA-G-CGA---------A-T---GA-C---------------------------AGAGG---AG----------------------------------------------------CTT-G----------------------------------------------------------------------------------CTCCTCT-------------------CG--A--TT--T--AG-C-GGCG-G--A--C-------------GGG-TGAGT-A--AC-AC-G-T-G-GG---TAA--C-CTGC--C-T--TA--TA-G------------------------------------------------------------------C-TT----GGG-AT-AA-CTC-------------------------C-G-G-----------------------GAA-A---CCG-GGG-CTAATAC---CG-A----AT-A---------------------------------A-TA-C-T--T--T----------------TGG---AC-------------------------------------------------------------------------------------------------------------------------A-CA-T--------------------------------------------------------------------------------------------------------------------------------------G---T--TCG---------------A--A--AG-T-T-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAAA--G-A-T-GG----------------------------------------------------------------------------------------------------------------------------------TTCT--------------------------------------------------------------------------------------------------------------------------G--C--TA--T---C-A--------------C----T-A---T-AA-G---AT---G-G-----A-CCC-GCG--C-TGC--A------TT--A--G-CT-A----G---TTGG-T-G-AG-G-T----AAC-GG-C-T-C-ACCA--A-GG-C-C--A-CG-A------------TGC-A-T------AG-CC-G-A-CCT-G-AG----A--GG-GT--G-AT-C-GG-CCAC-A-CTGGG--A-C-TG-A-GA-C-AC-G-G-CCCAGA-CTCC-TAC-G--G-G-A-G-GC-A-GC-A-G-TA---GG-G-A-ATC-TTCCA-C-AA-T-GG--AC-GA-A----A-G-TC-T-GA-TG-GA-GCAA-CGCC-G-CG-T---G-A-G--T--GA-A-G--A--A-G-G-AT-----TT-CG---------G-T-T-C-G-T--A---AA-A-CTC--------TG-TT-G-T--A-AGG----GA-A--G---AACAAGT---ACAG-TA----G--T--AA-C---T----G-----G--C-TGT-ACC-TT-GA-CG-GT-A-C-CT-T-AT-T---------AG-----------AAAGC-CAC-GG-C-TAA---C--T-ACGT--GCCA--G-C---A--GCCG---C-GG--TA-AT--AC---GT-AG-GTG-GCA-A-G-CG-TTGT-C-CGG-AA-TT-A--T-T--GGGC-GTA----AA-GCGC-GC--G-CA-G-G-T-G------------G--T-CC-T-T-T-AA----G-T-C-T---G-ATG-TG-A-AA-GC--CC-ACG-G--------------------------------------------------------------------CT-C-AA-------------------------------------------------------------------------CC-G-T-GG-AG------G-GTC-A-T-T--------G--GA-A-A-C-T-G-GGG--G-A-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-T-G-A-G-T-G-----C-AG--AA-G-A------------G-GA-T-AG-T----GG--AATT-CCA-A-GT--GT-A-GCG-GTGAAA-TG-CGT-AGAG-A-TT-T-GGA--GG-A-AC-A-CC-AG--T--G--GC-GAA-G--G-C---G----A--C-T-GTCTG------G-TC-TG--------------------------------------------------------------TA-A-C-T--GA--CA-----CT-GA-GG--C-G-CGA--AA-G-C--------------G-TGGG-GAG-C-A-AACA--GG-ATTA-G-ATA-C-----CC-T-G-GTA-G-T----C-CA--C-G-CCG-T-AAA--C-GATG-AG--TG-CT---------A-AG--T--G-T-TG-G-GG-G--G--T------------------------------------------------------------------------------------TT-CC----------------------------------------------------------------------------------------------------------------------------------------------G---C-C-C-CT--C-A-G-T-GC-T------GC--A----GC-TAA--CG-C-A-T--T--AA-GC--A----C-TCC-GCC-T-G-GG-GAG-TA---CGG-----T-C--G-C-A-A-GAC-T--GAA-ACTC-AAA---------GGAA-TTG-ACGGG-G-G-CCCG----C-A--C-A-A-GCG-GT-G--G--AG-CA-T--GT-GGT-TT-AATT-C-G-AAG-CAAC-G-CG-A-AG-A-A-CC-TT-A-CC-AGGTC-TT-G-AC-A-TCC--------------CAT-T-G-------------A-CC-A-C-T--GT--A-GA-G-A-T--A-C-A--G-T-T-T--T-C-----CC-------------------------------------T--TC-G------------------------------------------GG----G----A--CAA-CGG---T--GA---------------------------------------------------C-A-G-G-T-GGTG-CA-TGG-TT--GTC-GTC-A-GC-TC---G-TG-TC-G--TGA-GA-TGT-T-GG-G-TT-AA-GT-CCCGC-AA--------C-GAG-CGC-A-ACC-C-T-TG--TT--C-TTAG--T-T-G-C-C---AT-C-A--T----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TTAG----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T----T-G------------G----G---C-A--CT---------------C-T-A-A-G-GA-G--AC-T-G-CCG--G-T------------------------------------G-A---CAA----------------------------------A-C-C-G--G-A-GG-A--AGG-T--GGGG-A-TGAC-GTC--AAAT-C---ATC-A-T-G-C-C-C-CTT----AT-G--AC-C-T-GG-GC-TA-CAC-ACGTG-C--TA--CAATG---G-ACGG-T-A--C-AAA-CG-GT--------------------------------------------------------------------------------------------------T-G-C-C-A--A-CCCG-C--G---------------------------------------A-GG-G-G-----------G--A-G-CT---A----------A--TCC-G------A-T-AAAAC-CG-T-T-C-T-CAG-TTC--------GGA-T-TGTAG-GC--T-GCAA-CT-C-------------------------------------------------------------------------------------------------G-CCTAC-A-T-G-AA-G-CC-GGAAT-CG-C-TA--G-TA-AT-C-G-C----GGA-TC-A-G-C-------AT--GCC-GC-G-GT-G-AAT-ACGT-T-CCCGGGCCT-TGTA----CACACCG-CCC-GTC-----A---CA--CCA-CG-AG-A--G---TTT-G-TA-AC-ACC--C-GAA------G--T-CGG-TG-A-G-G-T-AA-C-C-T-----------------------------------------------------------T-TT--------------------------------------------------------------------------------------------------------GG-A-G-C-C--A---GC-CGC--CG--AAG-G----T-GGG-AT-AGA------------------------TG--ATT-GGGG-TG-AAG-TCGTAACAA-GGT--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- >modified_AKIW1129_3_prime_end_extended 12..1520 -----------------------------------------------------------------------------------------------------------------------------T-G-GCTC-AG-GA-CGAA-C-GC--TGG-C--G-GC-G-TG--C----C-T--AATACA-T-GC-A-AGT-CGA-G-CGA---------A-T---GA-C-------------------------------A-G-AG------------------------------------------------GAGCTTGCT----------------------------------------------------------------------------------CCTC--------------------T-CG--A--TT--T--AG-C-GG-C-GG-A--C-------------GGG-TGAGT-A--AC-AC-G-T-G-GG---TAA--C-CT-G--C-C-TTA--TA-G------------------------------------------------------------------C-TT----GGG-AT-AA-CTC-------------------------C-G-G-----------------------GAA-A---CCG-GGG-CTAA-TA---CC-G--A-AT-A----------A--------------------T-A--C-T-T--T--T-----------------GG---AC-A-----------------------------------------------------------------------------------------------------------------------C-AT-G--------------------------------------------------------------------------------------------------------------------------------------T---T--C-G---------------A--A-A-G-T-T-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAAA--G-A-T-GG--------TTCT-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GC-TA--T---C-A--------------C----T-A---T-AA-G---AT---G-G-----A-CCC-GCG--C-TGC--A------TT--A--G-CT-A----G---TTGG-T-G-AG-G-T----AAC-GG-C-T-C-ACCA--A-GG-C-C--A-CG-A------------TGC-A-T------AG-CC-G-A-CCT-G-AG----A--GG-GT--G-AT-C-GG-CCAC-A-CTGGG--A-C-TG-A-GA-C-AC-G-G-CCCAGA-CTCC-TAC-G--G-G-A-G-GC-A-GC-A-G-TA---GG-G-A-ATC-TTCCA-C-AA-T-GG--AC-GA-A----A-G-TC-T-GA-TG-GA-GCAA-CGCC-G-CG-T---G-A-G--T--GA-A-G--A--A-G-G-AT-----TT-CG---------G-T-T-C-G-T--A---AA-A-CTC--------TG-TT-G-T--A-AGG----GA-A--G---AACAAGT---ACAG-TA----G--T--AA-C---T----G-----G--C-TGT-ACC-TT-GA-CG-GT-A-C-CT-T-AT-T---------AG-----------AAAGC-CAC-GG-C-TAA---C--T-ACGT--GCCA--G-C---A--GCCG---C-GG--TA-AT--AC---GT-AG-GTG-GCA-A-G-CG-TTGT-C-CGG-AA-TT-A--T-T--GGGC-GTA----AA-GCGC-GC--G-CA-G-G-T-G------------G--T-CC-T-T-T-AA----G-T-C-T---G-ATG-TG-A-AA-GC--CC-ACG-G--------------------------------------------------------------------CT-C-AA-------------------------------------------------------------------------CC-G-T-GG-AG-G----G-T-C-A-T-T--------G--GA-A-A-C-T-G-GGG--G-A-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-T-G-A-G-T-G-----C-AG--AA-G-A------------G-GA-T-AG-T----GG--AATT-CCA-A-GT--GT-A-GCG-GTGAAA-TG-CGT-AGAG-A-TT-T-GGA--GG-A-AC-A-CC-AG--T--G--GC-GAA-G--G-C---G----A--C-T-GTCTG------G-TC-TG--------------------------------------------------------------TA-A-C-T--GA--CA-----CT-GA-GG--C-G-CGA--AA-G-C--------------G-TGGG-GAG-C-A-AACA--GG-ATTA-G-ATA-C-----CC-T-G-GTA-G-T----C-CA--C-G-CCG-T-AAA--C-GATG-AG--TG-CT---------A-AG--T--G-T-TG-G-GG-G----------------------------------------------------------------------------------------GTTT-CCGC--------------------------------------------------------------------------------------------------------------------------------------------------C-C-CT--C-A-G-T-GC-T------GC--A----GC-TAA--CG-C-A-T--T--AA-GC--A----C-TCC-GCC-T-G-GG-GAG-TA---CGG-----T-C--G-C-A-A-GAC-T--GAA-ACTC-AAA---------GGAA-TTG-ACGGG-G-G-CCCG----C-A--C-A-A-GCG-GT-G--G--AG-CA-T--GT-GGT-TT-AATT-C-G-AAG-CAAC-G-CG-A-AG-A-A-CC-TT-A-CC-AGGTC-TT-G-AC-A-T-C--------------CCATT-G-------------A----C-C-A--CT--GTAG-A-G-A--T------A-C-A-G--T-TT----TC-----------------------------------CCTT-CG-G------------------------------------------GG---AC----A----A-CGG---T--GA---------------------------------------------------C-A-G-G-T-GGTG-CA-TGG-TT--GTC-GTC-A-GC-TC---G-TG-TC-G--TGA-GA-TGT-T-GG-G-TT-AA-GT-CCCGC-AA--------C-GAG-CGC-A-ACC-C-T-TG--TT--C-TTAG--T-T-G-C-C---AT-C-A--T----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TTAG----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T----T-G------------G----G---C-A--CT---------------C-T-A-A-G-GA-G--AC-T-G-CCG--G-T------------------------------------G-A---CAA----------------------------------A-C-C-G--G-A-GG-A--AGG-T--GGGG-A-TGAC-GTC--AAAT-C---ATC-A-T-G-C-C-C-CTT----AT-G--AC-C-T-GG-GC-TA-CAC-ACGTG-C--TA--CAATG---G-ACGG-T-A--C-AAA-CG-GT--------------------------------------------------------------------------------------------------T-G-C-C-A--A-CCCG-C--G---------------------------------------A-GG-G-G-----------G--A-G-CT---A----------A--TCC-G------A-T-AAAAC-CG-T-T-C-T-CAG-TTC--------GGA-T-TGTAG-GC--T-GCAA-CT-C-------------------------------------------------------------------------------------------------G-CCTAC-A-T-G-AA-G-CC-GGAAT-CG-C-TA--G-TA-AT-C-G-C----GGA-TC-A-G-C-------AT--GCC-GC-G-GT-G-AA-TACGT-T-CCCGGGCCT-TGTA----CACACCG-CCC-GTC-----A---CA--CCA-CG-AG-A--G---TTT-G-TA-AC-ACC--C-GAA------G--T-CGG-TG-A-G-G-T-AA-C-C---------------------------------------------------------------TT-T---------------------------------------------------------------------------------------------------T--GG-A--GC-C--A---GC-CGC--CG--AAG-G----T-GGG-AT-AGA------------------------TG--ATT-GGGG-TG-AAG-TCGTAACAA-GGTGA-TTAC-ACCGGAA------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ """ input_seqs1_fail_fasta = """>FAKE1 here is some desc.73602 tag1;tag2, tag3:tag4 AGGCGGCTACCTGGACCAACACTGACACTGAGGCACGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCCTAAACGATGCGAACTGGATGTTGGGTGCAATTTGGCACGCAGTATCGAAGCTAACGCGTTAAGTTCGCCGCCTGGGGAGTACGGTCGCAAGACTTAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGTATGTGGTTTAATTCGATGCAACGCGAAGAACCTTACCTGGTCTTGACATCCACGGAACTTTCCATAGATGGATTGGTGCCTTCGGGAACCGTGAGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGTCCTTAGTTGCCAGCACGTAATGGTGGGAACTCTAAGGAGACCGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAGTCATCATGGCCCTTAGGGGACCAGGGCTACACACGTACTACAATGGTAGGGACAGAGGGCTGCAAACCCGCGAGGGCAAGCCAATCCCAGAAACCCTATCTCAGTCCGGATTGGAGTTTGCAACTCGACTCCATGAAGTCGGAATCGCTAGTAATCGCAGATCAGCATTGCTGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCATGGGAGTTTGTTGCACCAGAAGCAGGTAGCTTAACCTTCGGGAGGGCGCTCACGGTGTGGCCGATGACTGGGGTGAAGTCGTAACAAGGTAGCCGTATCGGAAGGTGCGGCTGGATCACCTCCTTTTGAGCATGACGTCATCGTCCTGTCGGGCGTCCTCACAAATTACCTGCATTCAGAGATGCGTATCGGCACAGGCCGGTATGCGAAAGTCCCATCATGGGGCCTTAGCTCAGCTGGGAGAGCACCTGCTTTGCAAGCAGGGGGTCGTCGGTTCGATCCCGACAGGCTCCACCATTTGAGTGAAACGACTTTGGGTCTGTAGCTCAGGTGGTTAGAGCGCACCCCTGATAAGGGTGAGGTCGGTGGTTCGAGTCCTCCCAGACCCACCACTCTGAATGTAGTGCACACTTAAGAATTTATATGGCTCAGCGTTGAGGCTGAGACATGTTCTTTTATAACTTGTGACGTAGCGAGCGTTTGAGATATCTATCTAAACGTGTCGTTGAGGCTAAGGCGGGGACTTCGAGTCCCTAAATAATTGAGTCGTATGTTCGCGTTGGTGGCTTTGTACCCCACACAACACGGCGTATGGCCCCGAGGCAACTTGGGGTTATATGGTCAAGCGAATAAGCGCACACGGTGGATGCCTAGGCGGTCAGAGGCGATGAAGGACGTGGTAGCCTGCGAAAAGTGTCGGGGAGCTGGCAACAAGCTTTGATCCGGCAATATCCGAATGGGGAAACCCGG """ input_seqs2_fasta = """>2855189 SLEpi20M_15561395 TACGAAAGATCCAAGCGTTATTCGAAATGATTGGGCNTAAANAGTTTGTAGGCGGTATTTGTACTCACTTCTAAAAAACTAAGATTATCTCTTAGTATGG """ pynast_test_template_fasta2 = """>26799 -----------------------------------------------------------------------------------------------------AAATGGAGAGGTTT-GA--T-CC-T-G-GCTC-AG-GA-TGAA-C-GC--TGG-C--G-AT-A-TG--C----T-T--AACACA-T-GC-A-AGT-CGA-A-CGA---------A-T---AT-T--------------------------AAGTTTTCTTAAA--------------------------------------------------TTT-G----------------------------------------------------------------------------------TAG-AAA-------------------TT--TA-AT--ATTAG-T-GG-C-GA-A--C-------------GGG-TGAGT-A--AC-GC-G-T-A-AG---A-A--T-CT-G--C-T-TTT--GG-G------------------------------------------------------------------T-AA----AGA-AT-AA-CAA-------------------------T-T-G-----------------------GAA-A---CGA-TTG-CTAA-TA---CT-T--T-AT-A----------G----------------------------------------------------------GC-T-----------------------------------------------------------------------------------------------------------------------G-AG-G--------------------------------------------------------------------------------------------------------------------------------------A-G-T---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TAAA--G-G-T--------------------------------------------------------------------------------------------------------------------------------------TTT-A-------------------------------------------------------------------------------------------------------------------------------T--T-TCC-G--------------C----C-C---A-GA-A---AT---G-A-----G-CTT-GCG--T-CTG--A------TT--A--G-CT-A----G---TTGG-T-A-AG-A-T----AAA-AG-C-T-T-ACCA--A-GG-C-A--A-TG-A------------TCA-G-T------AG-TT-G-G-TCT-G-AG----A--GG-AT--G-AT-C-AA-CCAC-A-CTGGG--A-C-TG-A-GA-T-AC-G-G-CCCAGA-CCTT-TAC-G--G-A-G-G-GC-A-GC-A-G-TG---AG-G-A-ATT-TTCCG-C-AA-T-GG--GC-GA-A----A-G-CC-T-GA-CG-GA-GCAA-TATC-G-CG-T---G-A-A--G--GA-T-G--A--C-G-G-CC-----TG-TG---------G-G-T-T-G-T--A---AA-C-TTC--------TT-TT-C-T--T-AAG----AA-A--G---A--------------------A--T--TC------------------------------T-GA-CG-GT-A-C-TT-A-AG-G---------AA-----------TAAGC-ATC-GG-C-TAA---C--T-CCGT--GCCA--G-C---A--GCCG---C-GG--TA-AT--AC---GG-AG-GAT-GCA-A-G-CG-TTAT-C-CGA-AA-TT-A--T-T--GGGC-GTA----AA-GAGT-TT--G-TA-G-G-T-G------------G--T-TT-T-T-T-AA----G-T-C-T---A-CTG-TT-A-AA-TA--TC-AGA-G--------------------------------------------------------------------CT-T-AA-------------------------------------------------------------------------CT-T-T-GA-AC-A----A-G-C-A-G-T--------A-TGA-A-A-C-T-A-ATT--A-A-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-T-G-A-G-T-T-----T-GG--TA-G-A------------G-GC-A-GA-G----GG--AACT-CTC-G-AT--GT-A-GTG-GTGAAA-TA-CGT-AGAT-A-TC-G-GGG--GG-A-AC-A-CC-AG--T--A--GC-GAA-A--G-C---G----C--T-C-TGCTG------G-GC-CA--------------------------------------------------------------TA-A-C-T--GA--CA-----CT-GA-GA--A-A-CGA--AA-G-C--------------T-AGGG-GAG-C-A-AATA--GG-ATTA-G-ATA-C-----CC-T-A-GTA-G-T----C-CT--A-G-CTG-T-AAA--C-GATG-GA--TA-CT---------A-AG--T--A-T-TG-G-GC------------------------------------------------------------------------------------------TTTTTGAAG------------------------------------------------------------------------------------------------------------------------------------------------------TT--C-A-G-T-GT-T------GA--A----GC-TAA--CG-C-G-T--T--AA-GT--A----T-CCC-GCC-T-G-GG-GAG-TA---CGT-----T-C--G-C-A-A-GAA-T--GAA-ACTC-AAA---------GGAA-TTG-ACGGG-G-G-CCCG----C-A--C-A-A-GCG-GT-G--G--AG-CA-T--GT-GGT-TT-AATT-C-G-ATG-CAAC-G-CG-A-AG-A-A-CC-TT-A-CC-AGGAA-TT-G-AC-A-T-A--------------CTC-G-T--------------TGGTT-T-T--TT--A-GA-A-A-T--A-A-A--A-A-A-------------C-------------------------------------T--GT-T------------------------------------------A--------------AA-GAG---A--TA---------------------------------------------------C-A-G-G-T-GGTG-CA-TGG-CT--GTC-GTC-A-GC-TC---G-TG-TC-G--TGA-GA-TGT-T-GG-G-TT-AA-GT-CCCGC-AA--------C-GAG-CGC-A-ACC-C-T-TG--TC--T-TTAG--T-T-G-T-T---AT-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TA---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------G-A-GA-G--AC-T-G-CCG--G-T------------------------------------G-A---TAA----------------------------------A-C-C-G--G-A-GG-A--AGG-T--GAGG-A-TGAC-GTC--AAGT-C---AGC-A-T-G-C-C-C-CTT----AA-G--TC-C-T-GG-GC-GA-CAC-ACGTG-C--TA--CAATG---G-TATA-G-A--C-AAA-GG-GA--------------------------------------------------------------------------------------------------A-G-C-A-A--A-TCTG-C--G---------------------------------------A-AG-A-G-----------T--A-G-CA---A----------A--TCT-C------A---AAAAC-TATA-T-C-T-CAG-TTC--------GGA-T-TGCAG-GC--T-GCAA-CT-C-------------------------------------------------------------------------------------------------G-CCTGC-A-T-G-AA-G-TC-GGAAT-CG-C-TA--G-TA-AT-C-G-C----TGG-TC-A-G-CC------AT--ACA-GC-G-GT-G-AAT-ATGT-T-CTCGGGCCT-TGTA----CACACCG-CCC-ATC-----A---CG--CTC-GA-GA-A--A---TTG-G-AA-AT-ACC--C-AAA------G--T-CAT-CA-T-T-C-T-AA-CCATATT---------------------------------------------------------T-TT-T---------------------------------------------------------------------------------------------------G---G-A--AG-A--T---AA-TGC--CA--AAG-G----T-AGA-GC-TAG------------------------TG--ACT-CAAG-CG-AAG-TTGTAACAA-GGTAA-CCGT-ACTGGAA-GGTG-CGGT-TGGATCACCTCCTTA---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- """ if __name__ == "__main__": main()