{{ super() }}
{% endblock %}
{% block sidebar1 %}{{ sidebar() }}{% endblock %}
{% block sidebar2 %}{% endblock %}
{# include the Google Analytics Tracker #}
{% block footer %}
{{ super() }}
{% endblock %}
PyNAST-1.2/doc/conf.py 0000664 0000000 0000000 00000014222 12047240761 0014502 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
#
# PyNAST documentation build configuration file, created by
# sphinx-quickstart on Mon Jan 25 11:42:17 2010.
#
# This file is execfile()d with the current directory set to its containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
import sys, os
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#sys.path.append(os.path.abspath('.'))
# -- General configuration -----------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = []
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix of source filenames.
source_suffix = '.rst'
# The encoding of source files.
#source_encoding = 'utf-8'
# The master toctree document.
master_doc = 'index'
# General information about the project.
project = u'PyNAST'
copyright = u'2010, Greg Caporaso'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = '1.2'
# The full version, including alpha/beta/rc tags.
release = '1.2'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#language = None
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#today = ''
# Else, today_fmt is used as the format for a strftime call.
#today_fmt = '%B %d, %Y'
# List of documents that shouldn't be included in the build.
#unused_docs = []
# List of directories, relative to source directory, that shouldn't be searched
# for source files.
exclude_trees = ['_build']
# The reST default role (used for this markup: `text`) to use for all documents.
#default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
#add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
#add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
#show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# A list of ignored prefixes for module index sorting.
#modindex_common_prefix = []
# -- Options for HTML output ---------------------------------------------------
# The theme to use for HTML and HTML Help pages. Major themes that come with
# Sphinx are currently 'default' and 'sphinxdoc'.
html_theme = 'default'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#html_theme_options = {}
# Add any paths that contain custom themes here, relative to this directory.
#html_theme_path = []
# The name for this set of Sphinx documents. If None, it defaults to
# " v documentation".
#html_title = None
# A shorter title for the navigation bar. Default is the same as html_title.
#html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
#html_logo = None
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
#html_favicon = None
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
#html_last_updated_fmt = '%b %d, %Y'
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
#html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
#html_sidebars = {}
# Additional templates that should be rendered to pages, maps page names to
# template names.
#html_additional_pages = {}
# If false, no module index is generated.
#html_use_modindex = False
# If false, no index is generated.
html_use_index = False
# If true, the index is split into individual pages for each letter.
#html_split_index = False
# If true, links to the reST sources are added to the pages.
html_show_sourcelink = False
# If true, an OpenSearch description file will be output, and all pages will
# contain a tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
#html_use_opensearch = ''
# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml").
#html_file_suffix = ''
# Output file base name for HTML help builder.
htmlhelp_basename = 'PyNASTdoc'
# -- Options for LaTeX output --------------------------------------------------
# The paper size ('letter' or 'a4').
#latex_paper_size = 'letter'
# The font size ('10pt', '11pt' or '12pt').
#latex_font_size = '10pt'
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
('index', 'PyNAST.tex', u'PyNAST Documentation',
u'Greg Caporaso', 'manual'),
]
# The name of an image file (relative to this directory) to place at the top of
# the title page.
#latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
#latex_use_parts = False
# Additional stuff for the LaTeX preamble.
#latex_preamble = ''
# Documents to append as an appendix to all manuals.
#latex_appendices = []
# If false, no module index is generated.
#latex_use_modindex = False
PyNAST-1.2/doc/index.rst 0000664 0000000 0000000 00000006726 12047240761 0015056 0 ustar 00root root 0000000 0000000 .. PyNAST documentation master file, created by
sphinx-quickstart on Mon Jan 25 11:42:17 2010.
Downloading PyNAST: Latest stable release
=========================================
You can download the latest `stable release of PyNAST here `_ and the `PyNAST OS X GUI (still PyNAST 1.0) here `_.
Downloading PyNAST: Development version
=======================================
If you want access to the latest-and-greatest features of PyNAST and can tolerate some instability we recommend that you check out the latest version from GitHub. You can do that with the following command: ::
git clone git://github.com/qiime/pynast.git PyNAST
Installing PyNAST
=================
`Notes on installing and using the PyNAST command line application. `_
`Notes on installing and using the PyNAST 1.0 Mac OS X GUI. `_
Stay up-to-date on PyNAST news
==============================
Subscribing to the PyNAST blog_ is the best way to keep up-to-date on news related to PyNAST. You can subscribe via RSS or e-mail on the front page of the blog. This is a very low traffic list, with currently around one e-mail per month or less.
The PyNAST blog is the primary means by which we will communicate information on bugs, new releases, and news to our users, so we highly recommend subscribing. We won't share subscriber information with anyone ever.
About PyNAST
============
PyNAST_ is a reimplementation of the NAST_ sequence aligner, which has become a popular tool for adding new 16s rDNA sequences to existing 16s rDNA alignments. This reimplementation is more flexible, faster, and easier to install and maintain than the original NAST implementation. PyNAST_ is built using the PyCogent Bioinformatics Toolkit.
The first versions of PyNAST (through PyNAST 1.0) were written to exactly match the results of the original NAST algorithm. Beginning with the post-PyNAST 1.0 development code, PyNAST no longer exactly matches the NAST output but is instead focused on getting better alignments. Users who wish to exactly match the results of NAST should download PyNAST 1.0.
Given a set of sequences and a template alignment, PyNAST_ will align the input sequences against the template alignment, and return a multiple sequence alignment which contains the same number of positions (or columns) as the template alignment. This facilitates the analysis of new sequences in the context of existing alignments, and additional data derived from existing alignments such as phylogenetic trees. Because any protein or nucleic acid sequences and template alignments can be provided, PyNAST_ is not limited to the analysis of 16s rDNA sequences.
PyNAST_ is presented in an open access `Bioinformatics Applications Note `_.
Citing PyNAST
=============
If you make use of PyNAST_ in published work, please cite:
**PyNAST: a flexible tool for aligning sequences to a template alignment.** J. Gregory Caporaso, Kyle Bittinger, Frederic D. Bushman, Todd Z. DeSantis, Gary L. Andersen, and Rob Knight. January 15, 2010, DOI 10.1093/bioinformatics/btp636. Bioinformatics 26: 266-267.
Need help?
==========
For PyNAST_ support, you can contact `Greg Caporaso `_.
.. _PyNAST: http://qiime.org/pynast
.. _blog: http://pynast.wordpress.com
.. _NAST: http://nar.oxfordjournals.org/cgi/content/full/34/suppl_2/W394
PyNAST-1.2/doc/install.rst 0000664 0000000 0000000 00000014630 12047240761 0015406 0 ustar 00root root 0000000 0000000 .. install_:
********************************************************
Installing and using the PyNAST command line application
********************************************************
Downloading PyNAST
==================
You can download the latest stable release of PyNAST `here `_.
You can download the latest development version of PyNAST here with the following command: ::
git clone git://github.com/qiime/pynast.git PyNAST
Required software
=================
PyNAST_ is built on the PyCogent_ package, and uses uclust_. You must have PyCogent `1.5.3 `_ and uclust `v1.1.579 `_ or uclust `v1.2.22q `_ installed to run PyNAST_. You should first obtain these software packages, and install them according to the instructions provided by their authors.
Optional software
=================
If you'd like to perform pairwise alignments using BLAST_, MUSCLE_, MAFFT_, or ClustalW_, you must have those programs installed on your machine and in your system path. Currently tested versions are BLAST_ 2.2.22, MUSCLE_ v3.8.31, MAFFT v6.602b (**MAFFT v6.925b is known to NOT work with PyNAST**), and ClustalW 1.81 or 1.83. Note that PyNAST makes use of the legacy BLAST software, not BLAST+.
Installation steps
==================
#. Download PyCogent_ 1.5.3 (`src `_) and its dependencies, Python_ 2.6 or greater (but less than Python 3.0) and NumPy 1.3.0 or greater. PyNAST was tested with Python 2.7.1 and 2.7.2 and NumPy 1.5.1, though other versions may work as well.
#. Download and install uclust_. Binaries are available, or you can install from source (`uclust v1.1.579 binaries and src `_ or `uclust v1.2.22q binaries and src `_).
#. From your command terminal on an OS X or Linux system, change to the directory where you wish to install PyNAST_. You can either download `PyNAST 1.2 from here `_, or if you want the latest development version you can checkout the latest version of PyNAST_ from the GitHub repository with the command: ::
git clone git://github.com/qiime/pynast.git PyNAST
If you downloaded from GitHub, you will have a new folder in the current working directory called ``PyNAST``. If you downloaded PyNAST-1.2, after untar/unzipping ``PyNAST-1.2.tar.gz``, you will have a new directory named ``PyNAST-1.2``. **For consistency, all instructions below will refer to this directory as** ``PyNAST``. You may choose to rename ``PyNAST-1.2`` as ``PyNAST``.
#. Run setup.py. You may need to do this as root (see :ref:`customizing_your_installation` below if this is not an option, or if you'd like to install the PyNAST library code and/or scripts in non-default locations): ::
cd PyNAST
python setup.py install
#. Change to the PyNAST/tests directory: ::
cd tests
#. Run the test suite with the following command. All tests should pass, unless you don't have BLAST_, MUSCLE_, MAFFT_, and/or ClustalW_ installed. These are optional external software packages, and you will get one test failure per missing software package. You can ignore test failures which indicate that these programs cannot be found. ::
python all_tests.py
#. If all tests pass, you can get the usage information for the command line version of PyNAST_ with the following command anywhere on your system: ::
cd
pynast -h
.. _customizing_your_installation:
Customizing your installation
=============================
PyNAST consists of library code and a script. By default the script will be installed in ``/usr/local/bin``. This can be customized with the ``--install_scripts`` option: ::
python setup.py install --install-scripts=/home/pynast_user/bin/
You can similarly install the library code in an alternate location using the ``--install-purelib`` option: ::
python setup.py install --install-purelib=/home/pynast_user/lib/
A combination of these options is also possible: ::
python setup.py install --install-scripts=/home/pynast_user/bin/ --install-purelib=/home/pynast_user/lib/
For a complete discussion of customizations related to the setup.py script, `see this page `_.
If you specify an alternate directory for ``--install-purelib``, you'll need to ensure that python knows where to look for the pynast module. Following the example above, you would do this with the following commands: ::
echo "export PYTHONPATH=/home/pynast_user/lib/:$PYTHONPATH" >> /home/pynast_user/.bashrc
source /home/pynast_user/.bashrc
Similarly, if you specify an alternate directory for ``--install-scripts``, you'll need to ensure that the shell knows where to look for executable files. Following the example above, you would do this with the following commands: ::
echo "export PATH=/home/pynast_user/bin/:$PATH" >> /home/pynast_user/.bashrc
source /home/pynast_user/.bashrc
Using the PyNAST command line application
=========================================
After installing the PyNAST_ software as described above, you should download the sample candidate sequences and template alignment. You can then apply the PyNAST_ command line tool as follows: ::
pynast -i candidate_seqs_sample.fasta -t template_sample.fasta
This will result in three files being written to the current working directory: :file:`candidate_seqs_sample_pynast_aligned.fasta`, :file:`candidate_seqs_sample_pynast_log.txt`, and :file:`candidate_seqs_sample_pynast_fail.fasta`, which correspond to the alignment, the run log, and the list of sequences which failed to align, respectively.
To get usage information for the PyNAST_ command line application run: ::
pynast -h
.. _PyCogent: http://pycogent.sourceforge.net
.. _Python: http://www.python.org
.. _NumPy: http://numpy.scipy.org/
.. _MUSCLE: http://www.drive5.com/muscle/
.. _PyNAST: http://qiime.org/pynast
.. _ClustalW: http://www.ebi.ac.uk/Tools/clustalw2/index.html
.. _BLAST: ftp://ftp.ncbi.nlm.nih.gov/blast/executables/LATEST/
.. _MAFFT: http://align.bmr.kyushu-u.ac.jp/mafft/online/server/
.. _uclust: http://www.drive5.com/uclust/
PyNAST-1.2/doc/install_gui.rst 0000664 0000000 0000000 00000004626 12047240761 0016256 0 ustar 00root root 0000000 0000000 .. Install GUI
*************************************
Installing and using the Mac OS X GUI
*************************************
Download the draft version of the `PyNAST OS X GUI here `_. Unzip the downloaded file to extract the PyNAST_ application. Depending on your system settings, the PyNAST_ application will either be called PyNAST or PyNAST.app. Ensure that your system meets the requirements listed below. If all requirements are met, double-click on the PyNAST_ application to launch PyNAST_. Note that YOU DO NOT NEED PyCogent_ or the PyNAST_ API/command line interface installed to use the PyNAST_ GUI.
Requirements for the PyNAST GUI
===============================
* An Intel Mac running OS X 10.5 (Leopard).
* Python_ 2.5 or greater (but less than Python 3.0) and NumPy_ 1.3.0 or greater.
* ``blastall``, ``formatdb``, and ``bl2seq`` installed in ``/usr/bin/``, ``/usr/local/bin/``, or ``$HOME/bin``. These are all part of NCBI's 'legacy' BLAST_ package, *NOT the BLAST+ package*. Versions 2.2.16 through 2.2.21 have been tested extensively with PyNAST_, but other versions should work. (Due to current limitations of the PyNAST_ GUI you need to have the required external software installed in one of these specific locations on your system.)
Optional for the PyNAST GUI
===========================
* MUSCLE_ installed in ``/usr/bin/``, ``/usr/local/bin/``, or ``$HOME/bin`` if you want to use that for pairwise aligning.
* ClustalW_ installed in ``/usr/bin/``, ``/usr/local/bin/``, or ``$HOME/bin`` if you want to use that for pairwise aligning.
Limitations in the draft release of the PyNAST GUI
==================================================
* Not all pairwise aligners are available. Missing options are pair_hmm and MAFFT.
* Users must place external executables in specific locations for PyNAST_ to find them, rather than PyNAST_ looking in user-defined locations. This will be addressed by adding a preferences box where users can define where these executables are stored.
* No help text within the application.
.. _PyCogent: http://pycogent.sourceforge.net
.. _Python: http://www.python.org
.. _NumPy: http://numpy.scipy.org/
.. _MUSCLE: http://www.drive5.com/muscle/
.. _PyNAST: http://qiime.org/pynast
.. _ClustalW: http://www.ebi.ac.uk/Tools/clustalw2/index.html
.. _BLAST: ftp://ftp.ncbi.nlm.nih.gov/blast/executables/LATEST/
PyNAST-1.2/pynast/ 0000775 0000000 0000000 00000000000 12047240761 0013753 5 ustar 00root root 0000000 0000000 PyNAST-1.2/pynast/__init__.py 0000664 0000000 0000000 00000000535 12047240761 0016067 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
__author__ = "The PyNAST Development Team"
__copyright__ = "Copyright 2010, The QIIME Project"
__credits__ = ["Greg Caporaso", "Kyle Bittinger", "Rob Knight"]
__license__ = "GPL"
__version__ = "1.2"
__maintainer__ = "Greg Caporaso"
__email__ = "gregcaporaso@gmail.com"
__status__ = "Development"
__all__ = ['logger','util']
PyNAST-1.2/pynast/logger.py 0000664 0000000 0000000 00000002355 12047240761 0015611 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
import logging
__author__ = "Kyle Bittinger"
__copyright__ = "Copyright 2010, The PyNAST Project"
__credits__ = ["Greg Caporaso", "Kyle Bittinger"]
__license__ = "GPL"
__version__ = "1.2"
__maintainer__ = "Kyle Bittinger"
__email__ = "kylebittinger@gmail.com"
__status__ = "Development"
class NastLogger:
__LABELS = [
"candidate sequence ID",
"candidate nucleotide count",
"errors",
"template ID",
"BLAST percent identity to template",
"candidate nucleotide count post-NAST",
]
def __init__(self, filename=None):
self.Filename = filename
self.__logger = self.__init_logger()
self.record(*self.__LABELS)
def __init_logger(self):
if self.Filename is not None:
handler = logging.FileHandler(self.Filename, mode='w')
else:
class NullHandler(logging.Handler):
def emit(self, record): pass
handler = NullHandler()
logger = logging.getLogger("PyNAST logger")
logger.addHandler(handler)
logger.setLevel(logging.INFO)
return logger
def record(self, *args):
log_entry = '\t'.join(map(str, args))
self.__logger.info(log_entry)
PyNAST-1.2/pynast/util.py 0000775 0000000 0000000 00000075006 12047240761 0015315 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
from __future__ import division
from os import system, remove, popen
from os.path import exists
from shutil import copy as copy_file
from glob import glob
from cogent import DNA, LoadSeqs, Sequence
from cogent.util.misc import remove_files
from cogent.core.alignment import SequenceCollection, DenseAlignment
from cogent.align.align import make_dna_scoring_dict, global_pairwise
from cogent.app.blast import blastn
from cogent.app.formatdb import build_blast_db_from_seqs, \
build_blast_db_from_fasta_path
from cogent.app.muscle_v38 import align_unaligned_seqs as muscle_align_unaligned_seqs
from cogent.app.mafft import align_unaligned_seqs as mafft_align_unaligned_seqs
from cogent.app.clustalw import align_unaligned_seqs as clustal_align_unaligned_seqs
from cogent.app.util import get_tmp_filename
from cogent.app.uclust import uclust_search_and_align_from_fasta_filepath
from cogent.parse.blast import BlastResult
from cogent.parse.fasta import MinimalFastaParser
from pynast.logger import NastLogger
__author__ = "Greg Caporaso"
__copyright__ = "Copyright 2010, The PyNAST Project"
__credits__ = ["Greg Caporaso", "Kyle Bittinger", "Jai Ram Rideout"]
__license__ = "GPL"
__version__ = "1.2"
__maintainer__ = "Greg Caporaso"
__email__ = "gregcaporaso@gmail.com"
__status__ = "Development"
""" PyNAST is a complete rewrite of the NAST algorithm written in python.
While PyNAST 1.0 strived to exactly match the results of the original
NAST algorithm, the later version (beginning with the post-1.0 development
code) no longer exactly matches the the original NAST algorithm, hopefully
in favor of better results.
PyNAST depends on PyCogent, NumPy, Python, and uclust. The versions
used for development are:
PyCogent 1.5.3
NumPy 1.5.1
Python 2.7.1
uclust 1.1.579
The PyNAST algorithm works as follows:
(1) Using uclust, identify the closest match to a sequence in a template
alignment.
(2) Pairwise align the candidate sequence and template match identified
in step 1 (default uses the uclust result, but users can specify an
alternative pairwise aligner).
(3) Reintroduce gap pattern from the template sequence.
(4) Identify insertions which expand the template length. For each
'template-expanding' insertion, find the nearest gap character in the
candidate sequence and remove it.
(5) Return the aligned candidate sequence.
"""
class UnalignableSequenceError(Exception):
pass
def pair_hmm_align_unaligned_seqs(seqs,moltype,params={}):
"""
This needs to be moved to cogent.align.align
"""
seqs = LoadSeqs(data=seqs,moltype=moltype,aligned=False)
try:
s1, s2 = seqs.values()
except ValueError:
raise ValueError,\
"Pairwise aligning of seqs requires exactly two seqs."
try:
gap_open = params['gap_open']
except KeyError:
gap_open = 5
try:
gap_extend = params['gap_extend']
except KeyError:
gap_extend = 2
try:
score_matrix = params['score_matrix']
except KeyError:
score_matrix = make_dna_scoring_dict(\
match=1,transition=-1,transversion=-1)
return global_pairwise(s1,s2,score_matrix,gap_open,gap_extend)
def blast_align_unaligned_seqs(seqs,moltype,params={}):
""" Pairwise align two seqs using bl2seq
This needs to be moved to the blast application controller.
"""
seqs = dict(LoadSeqs(data=seqs,moltype=moltype,aligned=False).items())
seq_ids = seqs.keys()
query_id = seq_ids[0]
subject_id = seq_ids[1]
if len(seq_ids) != 2:
raise ValueError,\
"Pairwise aligning of seqs with blast requires exactly two seqs."
in_filepath1 = get_tmp_filename(tmp_dir='/tmp/',\
prefix='bl2seq_input1_',suffix='.fasta')
in_filepath2 = get_tmp_filename(tmp_dir='/tmp/',\
prefix='bl2seq_input2_',suffix='.fasta')
in_filepaths = [in_filepath1,in_filepath2]
out_filepath = get_tmp_filename(tmp_dir='/tmp/',\
prefix='bl2seq_output_',suffix='.fasta')
for n,in_filepath in zip(seq_ids,in_filepaths):
f = open(in_filepath,'w')
f.write('>%s\n' % n)
f.write(str(seqs[n]))
f.write('\n')
f.close()
# Note: -S 1 indicated that we don't want to blast both orientations -- at
# this would be different behavior than other pairwise aligners.
bl2seq_res = system('bl2seq -i %s -j %s -o %s -F F -S 1 -q -1 -p blastn -VT' %\
(in_filepath1,in_filepath2,out_filepath))
if bl2seq_res != 0:
raise RuntimeError, "bl2seq failed:\n %s" % bl2seq_res
query_seq = []
subject_seq = []
blast_res = open(out_filepath)
in_result = False
for line in blast_res:
if line.strip().startswith('Score'):
if in_result:
break
else:
in_result = True
if line.startswith('Query: '):
fields = line.split()
query_seq.append(fields[2].upper())
elif line.startswith('Sbjct: '):
fields = line.split()
subject_seq.append(fields[2].upper())
else:
continue
remove(in_filepath1)
remove(in_filepath2)
remove(out_filepath)
# reintroduce terminal characters which were not aligned -- this
# needs to be split out to another function to facilitate easier testing
q = ''.join(query_seq)
q = q.replace('-','')
s = ''.join(subject_seq)
s = s.replace('-','')
query_in = str(seqs[query_id])
subject_in = str(seqs[subject_id])
q_start = query_in.index(q[:100])
q_end = q_start + len(q)
s_start = subject_in.index(s[:100])
s_end = s_start + len(s)
five_prime_bases_to_add = max(q_start,s_start)
three_prime_bases_to_add = max(len(query_in)-q_end, len(subject_in)-s_end)
if five_prime_bases_to_add:
leading_bases = query_in[:q_start]
query_seq = '%s%s%s' % \
('-'*(five_prime_bases_to_add-len(leading_bases)),\
leading_bases,
''.join(query_seq))
leading_bases = subject_in[:s_start]
subject_seq = '%s%s%s' % \
('-'*(five_prime_bases_to_add-len(leading_bases)),\
leading_bases,\
''.join(subject_seq))
if three_prime_bases_to_add:
trailing_bases = query_in[q_end:]
query_seq = '%s%s%s' %\
(''.join(query_seq),\
trailing_bases,\
'-'*(three_prime_bases_to_add-len(trailing_bases)))
trailing_bases = subject_in[s_end:]
subject_seq = '%s%s%s' %\
(''.join(subject_seq),\
trailing_bases,\
'-'*(three_prime_bases_to_add-len(trailing_bases)))
result = [(query_id,query_seq),\
(subject_id,subject_seq)]
return LoadSeqs(data=result,moltype=moltype)
def align_two_seqs(template, candidate,
align_unaligned_seqs_f=muscle_align_unaligned_seqs,
params={},moltype=DNA):
""" Align the two sequences with an arbitrary aligner function
template: the template sequence to align (string)
candidate: the candidate sequence to align (string)
align_unaligned_seqs_f: function to be applied to aligned the
candidate and template sequences -- function must be of the form
align_unaligned_seqs_f(seqs,moltype,params=params)
params: params to be passed to align_unaligned_seqs
moltype: moltype to be passed to align_unaligned_seqs
"""
# Load the sequences into a form useful to align_unaligned_seq_f
seqs = [('template',str(template)), ('candidate',str(candidate))]
# Align the sequences
aln = align_unaligned_seqs_f(seqs,moltype,params=params)
# Extract the sequences from the alignment object and return them
return aln.getGappedSeq('template'), aln.getGappedSeq('candidate')
def reintroduce_template_spacing(template,
pw_aligned_template,pw_aligned_candidate):
""" reintroduce template gap spacing into pairwise aligned sequences
"""
# Check for the simple case where the alignment reproduced the
# template spacing
if template == pw_aligned_template:
return (pw_aligned_template, pw_aligned_candidate,[])
# get gap maps to help with relating the aligned template sequence
# to the pairwise aligned template and candidate sequences
template_seq_to_aln = template.gapMaps()[0]
pw_template_seq_to_aln, pw_template_aln_to_seq = \
pw_aligned_template.gapMaps()
# build a list to keep track of gaps that were introduced in
# the pairwise alignment but which were not present in the template
# alignment
new_gaps_in_pw_alignment = []
# create variable to keep track of how many gaps have been
# reintroduced so far from the template to the pw_aligned_template -
# this is necessary to efficently compute new_gaps_in_pw_alignment
total_reintroduced_gaps = 0
template_result = list(pw_aligned_template)
candidate_result = list(pw_aligned_candidate)
# begin iteration over the alignment positions
for aln_curr_pos in range(len(pw_aligned_template)):
try:
# map the current alignment position to the
# corresponding sequence (ie. ungapped) position
seq_curr_pos = \
pw_template_aln_to_seq[aln_curr_pos]
except KeyError:
# if the current alignment position is a gap, move
# on to the next alignment position
continue
# store the next sequence position as it is used in several places
seq_next_pos = seq_curr_pos + 1
try:
# Get the number of gaps between the next and current
# alignment positions in the template alignment
template_post_char_gaps = \
template_seq_to_aln[seq_next_pos] - \
template_seq_to_aln[seq_curr_pos] - 1
except KeyError:
# at the end of the sequence
break
# Get the number of gaps between the next and current
# alignment positions in the template sequence in the
# pairwise alignment
pw_template_post_char_gaps = \
pw_template_seq_to_aln[seq_next_pos] -\
aln_curr_pos - 1
# compute the difference in the number of gaps following the
# current position in the two alignments
addl_gaps = template_post_char_gaps - pw_template_post_char_gaps
if addl_gaps > 0:
# if the additional gaps is greater than zero, additional
# gap characters need to be added to the pairwise alignment
insertion_point = aln_curr_pos + 1 + total_reintroduced_gaps
template_result[insertion_point:insertion_point] = ['-'] * addl_gaps
candidate_result[insertion_point:insertion_point] = ['-'] * addl_gaps
# update the tally of reintroduced gaps
total_reintroduced_gaps += addl_gaps
elif addl_gaps < 0:
# if the additional gaps is less than zero, the pairwise
# alignment introduced new gaps -- store these positions to be
# dealt with later. Note that first_new_gap_pos is
# adjusted by adding the number of the gap characters
# reintroduced to the current point. Positions
# in new_gaps_in_pw_alignment therefore refer to positions in
# the alignments being returned from this function
first_new_gap_pos = aln_curr_pos + total_reintroduced_gaps + 1
# add the positions of the new gaps chars to the list
# of new gaps
new_gaps_in_pw_alignment += \
range(first_new_gap_pos,first_new_gap_pos + (-1*addl_gaps))
else:
# gap pattern is the same following the current sequence
# position
pass
return (DNA.makeSequence(''.join(template_result)), \
DNA.makeSequence(''.join(candidate_result)),\
new_gaps_in_pw_alignment)
def nearest_gap(seq,pos):
""" Returns the position of the nearest gap to pos in seq
"""
# Catch negative sequence positions
if pos < 0:
raise IndexError, "Sequence positions cannot be negative: %d" % pos
# If pos contains a gap, that's the closest gap
if seq[pos] == '-':
return pos
# create a list to store the nearest gap character in the 5' and
# 3' directions
choices = []
# find the nearest gap 5' of pos
try:
gap_index = ''.join(seq[:pos]).rindex('-')
distance = pos - gap_index
choices.append((distance,gap_index))
except ValueError:
pass
# find the nearest gap 3' of pos
try:
gap_index = pos + ''.join(seq[pos:]).index('-')
distance = gap_index - pos
choices.append((distance,gap_index))
except ValueError:
pass
# error if there are no gaps in the sequence
if not choices:
raise UnalignableSequenceError,\
"Can't adjust alignment because there are too few gaps to "+\
"remove in the aligned candidate to reduce to the length of "+\
"the template alignment (i.e., candidate adds too many insertions "+\
"during pairwise alignment)."
# return the gap_index of the choice with the smaller distance -- if there
# is a tie, will delete the 5' gap (which is what original NAST does)
return min(choices)[1]
def adjust_alignment(template,candidate,new_gaps):
"""adjust template/candidate aln to remove gaps added by pairwise alignment
This step adjusts the alignment to reduce the length back to the
template alignment length by introducing local misalignments to
remove gap characters that are present in the pairwise alignment
but not in the template alignment.
"""
template_l = list(template)
candidate_l = list(candidate)
new_gaps.reverse()
for pos in new_gaps:
del template_l[pos]
del candidate_l[nearest_gap(candidate_l,pos)]
return (DNA.makeSequence(''.join(template_l)), \
DNA.makeSequence(''.join(candidate_l)))
def introduce_terminal_gaps(template,aligned_template,aligned_candidate):
""" introduce terminal gaps from template into the aligned candidate seq
"""
# count the 5' gaps in the original aligned template
original_five_prime_gaps = 0
for c in template:
if c == '-':
original_five_prime_gaps +=1
else:
break
# count the 5' gaps already existing in the pairwise aligned template
# (because we don't need to add these)
aligned_template_five_prime_gaps = 0
for c in aligned_template:
if c == '-':
aligned_template_five_prime_gaps += 1
else:
break
# compute the number of 5' gaps that need to be added to get to the
# original alignment length
five_prime_gaps_to_add = \
original_five_prime_gaps - aligned_template_five_prime_gaps
# count the 3' gaps in the original aligned template
original_three_prime_gaps = 0
for c in reversed(template):
if c == '-':
original_three_prime_gaps +=1
else:
break
# count the 3' gaps already existing in the pairwise aligned template
# (because we don't need to add these)
aligned_template_three_prime_gaps = 0
for c in reversed(aligned_template):
if c == '-':
aligned_template_three_prime_gaps += 1
else:
break
# compute the number of 3' gaps that need to be added to get to the
# original alignment length
three_prime_gaps_to_add = \
original_three_prime_gaps - aligned_template_three_prime_gaps
# return the sequence with the 5' and 3' gaps added
return DNA.makeSequence(''.join([\
'-'*five_prime_gaps_to_add,\
str(aligned_candidate),\
'-'*three_prime_gaps_to_add]),\
Name=aligned_candidate.Name)
def remove_template_terminal_gaps(candidate,template):
"""Remove template terminal gaps and corresponding bases in candidate
"""
if len(template) != len(candidate):
raise ValueError, \
"Sequences must be aligned, but their "+\
"lengths aren't equal. %d != %d" % (len(candidate),len(template))
if len(template) == 0:
return candidate, template
degapped_candidate_len = len(candidate.degap())
candidate = DNA.makeSequence(candidate)
template = DNA.makeSequence(template)
template_gap_vector = template.gapVector()
first_non_gap = template_gap_vector.index(False)
num_three_prime_gaps = template_gap_vector[::-1].index(False)
last_non_gap = len(template_gap_vector) - num_three_prime_gaps
# Construct the candidate name, which will include the range of bases
# from the original sequence
candidate = candidate[first_non_gap:last_non_gap]
template = template[first_non_gap:last_non_gap]
candidate_start_pos = first_non_gap + 1
candidate_end_pos = degapped_candidate_len - num_three_prime_gaps
candidate_name = candidate.Name
if candidate_name.endswith('RC'):
name_delimiter = ':'
else:
name_delimiter = ' '
candidate_name = '%s%s%d..%d' %\
(candidate_name,name_delimiter,candidate_start_pos,candidate_end_pos)
return DNA.makeSequence(candidate,Name=candidate_name), template
def depreciation_warning(d):
if d:
print "Unsupported or depreciated options "+\
"passed to pynast: %s\n" % ' '.join(d.keys()) +\
" blast_db, max_e_value, and addl_blast_params are depreciated " +\
"and will be removed in PyNAST 1.2."
def pynast_seq(candidate_sequence, template_alignment,
max_hits=30, min_pct=75.0, min_len=1000, align_unaligned_seqs_f=None,
**kwargs):
""" Apply PyNAST to a single sequence
candidate_sequence
a single DNA sequence object
template_alignment
a PyCogent alignment object containing the template alignment
or a fasta filepath
max_hits
Maximum number of uclust hits to return
min_pct
minimum % identity for best database match
min_len
minimum length of match for alignment
align_unaligned_seqs_f
Function to align sequences. Must be of the form:
align_unaligned_seqs(seqs, moltype, params=None)
see cogent.app.muscle_v38.align_unaligned_seqs
"""
depreciation_warning(kwargs)
class SingleSeqLogger(object):
""" A simple object to store results of a single pynast run """
def setUp(self):
self.Data = None
def record(self,*args):
self.Data = tuple(args)
l = SingleSeqLogger()
candidate_sequences = [(candidate_sequence.Name,str(candidate_sequence))]
aligned_seq, exit_status = list(ipynast_seqs(candidate_sequences,
template_alignment, max_hits=max_hits, min_pct=min_pct, min_len=min_len,
align_unaligned_seqs_f=align_unaligned_seqs_f,
log_fp=None, logger=l))[0]
if exit_status == 0:
return l.Data[3], aligned_seq
else:
raise UnalignableSequenceError, l.Data[2]
def ipynast_seqs(candidate_sequences, template_alignment,
max_hits=30, min_pct=75.0, min_len=1000, align_unaligned_seqs_f=None,
log_fp=None, logger=None,**kwargs):
"""Iterator that yields results of pynast on candidate_sequences
This function yields the sequence and exit status of the alignment step,
as (sequence, exit status) tuples.
Status values can be:
0 : indicates a sucessful alignment, in which case the sequence will be
aligned
1 : indicates unsucessful sequence search, in which case the sequence
will be unaligned
2 : indicates alignment did not meet minimum requirements, in which case
the sequence will be unaligned
All sequences are returned as DNA sequence objects.
candidate_sequences
an iterable object (e.g., a list) containing tuples of
(seq_id, sequence) pairs (e.g., as returned by MinimalFastaParser)
or a fasta filepath
template_alignment
a PyCogent alignment object containing the template alignment
or a fasta filepath
max_hits
Maximum number of uclust hits to return
min_pct
minimum % identity for best database match
min_len
minimum length of match for alignment
align_unaligned_seqs_f
Function to align sequences. Must be of the form:
align_unaligned_seqs(seqs, moltype, params=None)
see cogent.app.muscle_v38.align_unaligned_seqs
log_fp
Optional path to log file
logger
Optional NastLogger object, takes precedence over log_fp
"""
depreciation_warning(kwargs)
files_to_remove = []
if type(candidate_sequences) == str:
# filepath provided for candidate sequences
candidate_sequences = MinimalFastaParser(open(candidate_sequences))
# sequence list provided for candidate sequence -- write
# the seqs to a temp file to pass to uclust. This is done in all
# cases to convert the sequences to uppercase in case they're not already.
# The bad handling of upper versus lower-cased sequences is a uclust issue.
candidate_fasta_filepath = \
get_tmp_filename(prefix='pynast_candidate',suffix='.fasta')
candidate_fasta_f = open(candidate_fasta_filepath,'w')
for seq_id, seq in candidate_sequences:
candidate_fasta_f.write('>%s\n%s\n' % (seq_id,str(seq).upper()))
candidate_fasta_f.close()
files_to_remove.append(candidate_fasta_filepath)
# degap the template alignment for the sequence searching step and
# write it to file
template_fasta_filepath = \
get_tmp_filename(prefix='pynast_template',suffix='.fasta')
template_fasta_f = open(template_fasta_filepath,'w')
if type(template_alignment) == str:
# the template alignment was received as a filepath
try:
template_alignment_f = open(template_alignment)
except IOError:
raise IOError,\
"Cannot open specified filepath: %s" % template_alignment
# template alignment provided as filepath -- process it iteratively
# to handle potentially massive template_alignments
template_alignment = {}
for seq_id,seq in MinimalFastaParser(template_alignment_f):
template_alignment[seq_id] = seq
seq = Sequence(seq=seq,moltype=DNA)
template_fasta_f.write('>%s\n%s\n' % (seq_id,seq.degap()))
else:
# the template alignment was received as a filepath
template_fasta_f.write(template_alignment.degap().toFasta())
template_fasta_f.close()
files_to_remove.append(template_fasta_filepath)
# Set up logging. NastLogger object takes precedence over log
# file path, if both are provided.
if logger is not None:
logger = logger
elif log_fp is not None:
logger = NastLogger(log_fp)
else:
logger = NastLogger()
min_pct /= 100.
# get the alignment iterator
pw_alignment_iterator = uclust_search_and_align_from_fasta_filepath(
candidate_fasta_filepath,
template_fasta_filepath,
percent_ID=min_pct,
enable_rev_strand_matching=True)
try:
current_result = pw_alignment_iterator.next()
except StopIteration:
current_result = None
for seq_id, seq in MinimalFastaParser(open(candidate_fasta_filepath)):
seq_len = len(seq)
if '-' in seq:
# clean-up temporary blast database files if any were created
pw_alignment_iterator.close()
remove_files(files_to_remove,error_on_missing=False)
raise ValueError, "Candidate sequence contains gaps. This is not supported."
try:
candidate_seq_id, template_seq_id, pw_aligned_candidate,\
pw_aligned_template, pct_identity = current_result
except TypeError:
pass
if not current_result or seq_id.split()[0] != candidate_seq_id.split()[0]:
# a suitable match was not found - don't align the sequence
# log the failure
logger.record(
seq_id, # input sequence identifier
len(seq), # input sequence length
"No search results.")
# yield the unaligned sequence and failure code
yield DNA.makeSequence(seq,Name=seq_id), 1
else:
# this sequence was aligned
if align_unaligned_seqs_f:
# if an alternate pairwise aligner was specified, unalign
# and re-align the sequences.
pw_aligned_template, pw_aligned_candidate =\
align_two_seqs(pw_aligned_template.replace('-',''),
pw_aligned_candidate.replace('-',''),
align_unaligned_seqs_f)
# Cast the pairwise alignments to DNA sequence objects
pw_aligned_candidate = \
DNA.makeSequence(pw_aligned_candidate,Name=candidate_seq_id)
pw_aligned_template = \
DNA.makeSequence(pw_aligned_template,Name=template_seq_id)
# Remove any terminal gaps that were introduced into the template
# sequence
pw_aligned_candidate, pw_aligned_template = \
remove_template_terminal_gaps(
pw_aligned_candidate, pw_aligned_template)
candidate_seq_id = pw_aligned_candidate.Name
# get the aligned template sequence from the template alignment
try:
template_aligned_seq = \
template_alignment.getGappedSeq(template_seq_id)
except AttributeError:
template_aligned_seq = \
Sequence(seq=template_alignment[template_seq_id],moltype=DNA)
# reintroduce the gap spacing from the template alignment
pw_aligned_template, pw_aligned_candidate, new_gaps =\
reintroduce_template_spacing(template_aligned_seq,\
pw_aligned_template,pw_aligned_candidate)
# delete any new gaps that were introduced during the
# pairwise alignment step
pw_aligned_template, pw_aligned_candidate = adjust_alignment(\
pw_aligned_template,pw_aligned_candidate,new_gaps)
# reintroduce any terminal gaps that were present in the template
result = introduce_terminal_gaps(\
template_aligned_seq,pw_aligned_template,pw_aligned_candidate)
unaligned_length = len(result.degap())
if unaligned_length < min_len:
# alignment is too short - log this as a failure
error = "Alignment does not meet minimum length "+\
"requirement for alignment (%d < %d)"\
% (seq_len,min_len)
logger.record(
seq_id, # input sequence identifier
len(seq), # input sequence length
"No search results.")
# yield the unaligned sequence and failure code
yield DNA.makeSequence(seq,Name=seq_id), 2
else:
# log the alignment
logger.record(
seq_id, # input sequence identifier
len(seq), # input sequence length
'', # Errors
template_seq_id, # best template match id
'%3.2f' % pct_identity, # pct id to template
unaligned_length, # post alignment sequence length
)
# yield the aligned sequence and sucess code
yield DNA.makeSequence(result,Name=candidate_seq_id), 0
# get the next alignment
try:
current_result = pw_alignment_iterator.next()
except StopIteration:
# end of the input fasta file indicates completion,
# not end of the aligned sequences
continue
# clean-up temporary blast database files if any were created
remove_files(files_to_remove,error_on_missing=False)
def null_status_callback_f(x):
"""Dummy function to pass as default status_callback_f"""
pass
def pynast_seqs(candidate_sequences, template_alignment, max_hits=30,
min_pct=75.0, min_len=1000, align_unaligned_seqs_f=None, log_fp=None,
logger=None, status_callback_f=null_status_callback_f,**kwargs):
"""Function which runs pynast_seq on candidate_sequences.
Results are returned as a tuple of lists:
(aligned_sequences, failed_to_align_sequences)
where all sequences are DNA sequence objects.
candidate_sequences
an iterable object (e.g., a list) containing tuples of
(seq_id, sequence) pairs (e.g., as returned by MinimalFastaParser)
or a fasta filepath
template_alignment
a PyCogent alignment object containing the template alignment
or a fasta filepath
max_hits
Maximum number of uclust hits to return
min_pct
minimum % identity for best database match
min_len
minimum length of match for alignment
align_unaligned_seqs_f
Function to align sequences. Must be of the form:
align_unaligned_seqs(seqs, moltype, params=None)
see cogent.app.muscle_v38.align_unaligned_seqs
log_fp
Optional path to log file
logger
Optional NastLogger object, takes precedence over log_fp
status_callback_f:
Callback function to provide status updates to callers of pynast_seqs.
This function must take a single parameter.
"""
depreciation_warning(kwargs)
# create lists to keep track of the aligned candidate sequences
# and the sequences which fail to align
aligned = []
failed_to_align = []
pynast_iterator = ipynast_seqs(
candidate_sequences, template_alignment,
max_hits=max_hits, min_pct=min_pct, min_len=min_len,
align_unaligned_seqs_f=align_unaligned_seqs_f, log_fp=log_fp,
logger=logger)
for seq, status in pynast_iterator:
if status == 0:
aligned.append(seq)
status_callback_f(seq)
else:
failed_to_align.append(seq)
status_callback_f(seq)
return aligned, failed_to_align
pairwise_alignment_methods = {\
'muscle':muscle_align_unaligned_seqs,\
'mafft':mafft_align_unaligned_seqs,\
'clustal':clustal_align_unaligned_seqs,\
'blast':blast_align_unaligned_seqs,\
'pair_hmm':pair_hmm_align_unaligned_seqs,\
'uclust':None}
PyNAST-1.2/scripts/ 0000775 0000000 0000000 00000000000 12047240761 0014124 5 ustar 00root root 0000000 0000000 PyNAST-1.2/scripts/pynast 0000775 0000000 0000000 00000015632 12047240761 0015377 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
from optparse import OptionParser
from os.path import exists, splitext
from cogent import LoadSeqs, DNA
from cogent.core.alignment import DenseAlignment
from cogent.parse.fasta import MinimalFastaParser
from pynast.util import ipynast_seqs, pairwise_alignment_methods,\
null_status_callback_f
from pynast.logger import NastLogger
__author__ = "Greg Caporaso"
__copyright__ = "Copyright 2010, The PyNAST Project"
__credits__ = ["Greg Caporaso", "Kyle Bittinger"]
__license__ = "GPL"
__version__ = "1.2"
__maintainer__ = "Greg Caporaso"
__email__ = "gregcaporaso@gmail.com"
__status__ = "Development"
pynast_usage_string = """usage: %prog [options] {-i input_fp -t template_fp}
[] indicates optional input (order unimportant)
{} indicates required input (order unimportant)
Example usage:
%prog -i my_input.fasta -t my_template.fasta
"""
def parse_command_line_parameters():
""" Parses command line arguments """
usage = pynast_usage_string
version = 'Version: %prog 0.1'
parser = OptionParser(usage=usage, version=version)
parser.add_option('-t','--template_fp',action='store',\
type='string',help='path to template '+\
'alignment file [REQUIRED]')
parser.add_option('-i','--input_fp',action='store',\
type='string',help='path to input '+\
'fasta file [REQUIRED]')
parser.add_option('-v','--verbose',action='store_true',\
dest='verbose',default=False,\
help='Print status and other information '+\
'during execution [default: %default]')
parser.add_option('-p','--min_pct_id',action='store',\
type='float',default=75.0,help='minimum percent sequence '+\
' identity to consider a sequence a match [default: %default]')
parser.add_option('-l','--min_len',action='store',\
type='int',default=1000,help='minimum sequence length '+\
'to include in NAST alignment [default: %default]')
parser.add_option('-m','--pairwise_alignment_method',action='store',\
type='string',default='uclust',help='method '+\
'for performing pairwise alignment ' +\
'[default: %default]')
parser.add_option('-a','--fasta_out_fp',action='store',\
type='string',dest='fasta_out_fp',help='path to store '+\
'resulting alignment file ' +\
'[default: derived from input filepath]')
parser.add_option('-g','--log_fp',action='store',\
type='string',dest='log_fp',help='path to store '+\
'log file ' +\
'[default: derived from input filepath]')
parser.add_option('-f','--failure_fp',action='store',\
type='string',dest='failure_fp',help='path to store '+\
'file of seqs which fail to align ' +\
'[default: derived from input filepath]')
parser.add_option('-e','--max_e_value',
type='float',default=None,
help='Depreciated. Will be removed in PyNAST 1.2')
parser.add_option('-d','--blast_db',\
default=None,help='Depreciated. Will be removed in PyNAST 1.2')
opts,args = parser.parse_args()
if opts.max_e_value:
print "Depreciation Warning: max_e_value no longer used "+\
"as database search now uses uclust. "
if opts.blast_db:
print "Depreciation Warning: blast_db no longer used "+\
"as database search now uses uclust. "
if args:
parser.error('All parameters must be passed as options.'+\
' Offending parameter(s):\n %s' % '\n '.join(args))
required_options = ['input_fp','template_fp']
for option in required_options:
if eval('opts.%s' % option) == None:
parser.error('Required option --%s omitted.' % option)
pairwise_alignment_methods = {}.fromkeys([\
'muscle','mafft','clustal','pair_hmm','blast','uclust'])
if opts.pairwise_alignment_method not in pairwise_alignment_methods:
parser.error(\
'Unknown pairwise alignment method. Available options are:\n %s' %\
' '.join(pairwise_alignment_methods))
if not exists(opts.template_fp):
parser.error(\
'Template filepath does not exist:\n %s\n Pass a valid one via -t.'%\
opts.template_fp)
if not exists(opts.input_fp):
parser.error(\
'Input filepath does not exist:\n %s\n Pass a valid one via -i.'%\
opts.input_fp)
return opts,args
def main():
opts, args = parse_command_line_parameters()
verbose = opts.verbose
seqs_fp = opts.input_fp
min_pct_id = opts.min_pct_id
min_len = opts.min_len
template_fp = opts.template_fp
align_unaligned_seqs_f =\
pairwise_alignment_methods[opts.pairwise_alignment_method]
# If necessary, derive default locations for alignment, log, and
# failure files by removing the file extension from the sequences
# filepath, seqs_fp.
seqs_fp_base = splitext(seqs_fp)[0]
aln_fp = opts.fasta_out_fp or (seqs_fp_base + '_pynast_aligned.fasta')
log_fp = opts.log_fp or (seqs_fp_base + '_pynast_log.txt')
fail_fp = opts.failure_fp or (seqs_fp_base + '_pynast_fail.fasta')
if verbose:
print "Input file : %s" % seqs_fp
print "Template alignment : %s" % template_fp
print "Output alignment : %s" % aln_fp
print "Log file : %s" % log_fp
print "Failure file : %s" % fail_fp
logger = NastLogger(log_fp)
candidate_sequences = MinimalFastaParser(open(seqs_fp))
template_alignment = []
for seq_id, seq in MinimalFastaParser(open(template_fp)):
# replace '.' chars with '-' chars
# and lowercase chars with uppercase chars
template_alignment.append((seq_id,seq.replace('.','-').upper()))
try:
template_alignment = LoadSeqs(data=template_alignment,moltype=DNA,\
aligned=DenseAlignment)
except KeyError, e:
raise KeyError,\
'Only ACGT-. characters can be contained in template alignments.'+\
' The offending character was: %s' % e
pynast_iterator = ipynast_seqs(\
candidate_sequences,\
template_alignment,\
max_hits=30,\
min_pct=min_pct_id,\
min_len=min_len,\
align_unaligned_seqs_f=align_unaligned_seqs_f,\
logger=logger)
aln_file = open(aln_fp,'w')
fail_file = open(fail_fp,'w')
completed_seq_count = 0
for seq, status in pynast_iterator:
if status == 0:
aln_file.write('>%s\n%s\n' % (seq.Name,str(seq)))
else:
fail_file.write('>%s\n%s\n' % (seq.Name,str(seq)))
# Update completed sequence count, and print status message
# when requested by user.
completed_seq_count += 1
if verbose and completed_seq_count % 100 == 0:
print '%d sequences completed.' % completed_seq_count
aln_file.close()
fail_file.close()
if __name__ == "__main__":
main()
PyNAST-1.2/setup.py 0000664 0000000 0000000 00000003065 12047240761 0014153 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
# File created on 04 Feb 2010
from __future__ import division
from distutils.core import setup
import re
__author__ = "Greg Caporaso"
__copyright__ = "Copyright 2010, The PyNAST project"
__credits__ = ["Greg Caporaso"]
__license__ = "GPL"
__version__ = "1.2"
__maintainer__ = "Greg Caporaso"
__email__ = "gregcaporaso@gmail.com"
__status__ = "Development"
long_description = """The Python Nearest Alignment Space Termination tool
http://qiime.org/pynast
PyNAST: a flexible tool for aligning sequences to a template alignment.
J. Gregory Caporaso, Kyle Bittinger, Frederic D. Bushman, Todd Z. DeSantis, Gary L. Andersen, and Rob Knight.
January 15, 2010, DOI 10.1093/bioinformatics/btp636. Bioinformatics 26: 266-267.
"""
try:
import cogent
except ImportError:
print "PyCogent not installed but required. (Is it installed? Is it in the current users $PYTHONPATH or site-packages?) See http://pycogent.sourceforge.net."
exit(1)
pycogent_version = tuple([int(v) \
for v in re.split("[^\d]", cogent.__version__) if v.isdigit()])
if pycogent_version < (1,5,3):
print "PyCogent >= 1.5.3 required, but %s is installed." % cogent.__version__
exit(1)
setup(name='PyNAST',
version=__version__,
description='The Python Nearest Alignment Space Termination tool',
author=__maintainer__,
author_email=__email__,
maintainer=__maintainer__,
maintainer_email=__email__,
url='http://qiime.org/pynast',
packages=['pynast'],
scripts=['scripts/pynast'],
long_description=long_description
)
PyNAST-1.2/tests/ 0000775 0000000 0000000 00000000000 12047240761 0013577 5 ustar 00root root 0000000 0000000 PyNAST-1.2/tests/all_tests.py 0000775 0000000 0000000 00000007056 12047240761 0016156 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
"""Run all tests.
"""
from os import walk, environ
from subprocess import Popen, PIPE, STDOUT
from os.path import join, abspath, dirname, split
from glob import glob
import re
__author__ = "Rob Knight"
__copyright__ = "Copyright 2010, The PyNAST Project"
__credits__ = ["Rob Knight","Greg Caporaso", "Jai Ram Rideout"]
__license__ = "GPL"
__version__ = "1.2"
__maintainer__ = "Greg Caporaso"
__email__ = "gregcaporaso@gmail.com"
__status__ = "Development"
def main():
pynast_dir = abspath(join(dirname(__file__),'..'))
test_dir = join(pynast_dir,'tests')
scripts_dir = join(pynast_dir,'scripts')
unittest_good_pattern = re.compile('OK\s*$')
application_not_found_pattern = re.compile('ApplicationNotFoundError')
python_name = 'python'
bad_tests = []
missing_application_tests = []
# Run through all of PyNAST's unit tests, and keep track of any files which
# fail unit tests.
unittest_names = []
for root, dirs, files in walk(test_dir):
for name in files:
if name.startswith('test_') and name.endswith('.py'):
unittest_names.append(join(root,name))
unittest_names.sort()
for unittest_name in unittest_names:
print "Testing %s:\n" % unittest_name
command = '%s %s -v' % (python_name, unittest_name)
result = Popen(command,shell=True,universal_newlines=True,\
stdout=PIPE,stderr=STDOUT).stdout.read()
print result
if not unittest_good_pattern.search(result):
if application_not_found_pattern.search(result):
missing_application_tests.append(unittest_name)
else:
bad_tests.append(unittest_name)
# Run through all of PyNAST's scripts, and pass -h to each one. If the
# resulting stdout does not being with the Usage text, that is an
# indicator of something being wrong with the script. Issues that would
# cause that are bad import statements in the script, SyntaxErrors, or
# other failures prior to running parse_command_line_parameters.
script_names = []
script_names = glob('%s/*' % scripts_dir)
script_names.sort()
bad_scripts = []
for script_name in script_names:
script_good_pattern = re.compile('^Usage: %s' % split(script_name)[1])
print "Testing %s." % script_name
command = '%s %s -h' % (python_name, script_name)
result = Popen(command,shell=True,universal_newlines=True,\
stdout=PIPE,stderr=STDOUT).stdout.read()
if not script_good_pattern.search(result):
bad_scripts.append(script_name)
if bad_tests:
print "\nFailed the following unit tests.\n%s" % '\n'.join(bad_tests)
if missing_application_tests:
print "\nFailed the following unit tests, in part or whole due "+\
"to missing external applications.\nDepending on the QIIME features "+\
"you plan to use, this may not be critical.\n%s"\
% '\n'.join(missing_application_tests)
if bad_scripts:
print "\nFailed the following script tests.\n%s" % '\n'.join(bad_scripts)
# If any of the unit tests or script tests failed, or if we have any
# missing application errors, use return code 1 (as python's
# unittest module does) to indicate one or more failures with the test
# suite.
return_code = 1
if not (bad_tests or missing_application_tests or bad_scripts):
print "\nAll tests passed successfully."
return_code = 0
return return_code
if __name__ == "__main__":
exit(main())
PyNAST-1.2/tests/test_logger.py 0000775 0000000 0000000 00000004152 12047240761 0016474 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
from __future__ import division
from os import remove
from cogent import LoadSeqs, DNA
from cogent.util.unit_test import TestCase, main
from cogent.app.util import get_tmp_filename
from cogent.parse.fasta import MinimalFastaParser
from pynast.logger import NastLogger
__author__ = "Kyle Bittinger"
__copyright__ = "Copyright 2010, The PyNAST Project"
__credits__ = ["Greg Caporaso", "Kyle Bittinger"]
__license__ = "GPL"
__version__ = "1.2"
__maintainer__ = "Kyle Bittinger"
__email__ = "kylebittinger@gmail.com"
__status__ = "Development"
class NastLoggerTests(TestCase):
"""Tests of the PyNAST logging class"""
def setUp(self):
self.filename = get_tmp_filename(
prefix='NastLoggerTest',
suffix='.log',
)
def tearDown(self):
try:
remove(self.filename)
except OSError:
pass
def test_init(self):
"""NastLogger.__init__ should store log filename in Filename attribute"""
null_logger = NastLogger()
self.assertEqual(null_logger.Filename, None)
file_logger = NastLogger(self.filename)
self.assertEqual(file_logger.Filename, self.filename)
def test_header(self):
"""NastLogger.__init__ should write correct header to log file"""
logger = NastLogger(self.filename)
file = open(self.filename, 'r')
header = file.readline()
file.close()
exp_header = (
'candidate sequence ID\tcandidate nucleotide count\terrors\t'
'template ID\tBLAST percent identity to template\t'
'candidate nucleotide count post-NAST\n'
)
self.assertEqual(header, exp_header)
def test_record(self):
"""NastLogger.__init__ should record tab-separated values to log file"""
logger = NastLogger(self.filename)
logger.record('hello', 'world')
file = open(self.filename, 'r')
obs_header = file.readline()
obs_message = file.readline()
file.close()
self.assertEqual(obs_message, 'hello\tworld\n')
if __name__ == "__main__":
main()
PyNAST-1.2/tests/test_util.py 0000775 0000000 0000000 00000557742 12047240761 0016214 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
from __future__ import division
import sys
from cogent import LoadSeqs, DNA
from cogent.util.misc import remove_files
from cogent.core.alignment import DenseAlignment
from cogent.app.util import get_tmp_filename
from cogent.app.muscle_v38 import align_unaligned_seqs as muscle_align_unaligned_seqs
from cogent.app.mafft import align_unaligned_seqs as mafft_align_unaligned_seqs
from cogent.app.clustalw import align_unaligned_seqs as clustal_align_unaligned_seqs
from cogent.parse.fasta import MinimalFastaParser
from cogent.util.unit_test import TestCase, main
from pynast.util import (align_two_seqs, reintroduce_template_spacing,
adjust_alignment, nearest_gap, pynast_seq,
introduce_terminal_gaps, UnalignableSequenceError, pynast_seqs,
pair_hmm_align_unaligned_seqs, blast_align_unaligned_seqs, ipynast_seqs,
remove_template_terminal_gaps)
from pynast.logger import NastLogger
__author__ = "Greg Caporaso"
__copyright__ = "Copyright 2010, The PyNAST Project"
__credits__ = ["Greg Caporaso", "Kyle Bittinger", "Jai Ram Rideout"]
__license__ = "GPL"
__version__ = "1.2"
__maintainer__ = "Greg Caporaso"
__email__ = "gregcaporaso@gmail.com"
__status__ = "Development"
class PyNastTests(TestCase):
""" Tests of the PyNAST functionality
"""
def setUp(self):
""" """
self.files_to_remove = []
self.full_length_test1_input_seqs =\
LoadSeqs(data=input_seqs1_fasta,moltype=DNA,aligned=False)
self.full_length_test1_input_seqs_fp = \
get_tmp_filename(prefix='PyNastTest', suffix='.fasta')
self.files_to_remove.append(self.full_length_test1_input_seqs_fp)
full_length_test1_input_seqs_f = \
open(self.full_length_test1_input_seqs_fp,'w')
full_length_test1_input_seqs_f.write(input_seqs1_fasta)
full_length_test1_input_seqs_f.close()
self.full_length_test1_input_seqs_lines = input_seqs1_fasta.split('\n')
self.full_length_test1_template_aln = \
LoadSeqs(data=pynast_test_template_fasta1,moltype=DNA,aligned=DenseAlignment)
self.full_length_test1_template_aln_fp = \
get_tmp_filename(prefix='PyNastTest', suffix='.fasta',
result_constructor=str)
self.files_to_remove.append(self.full_length_test1_template_aln_fp)
full_length_test1_template_aln_f = \
open(self.full_length_test1_template_aln_fp,'w')
full_length_test1_template_aln_f.write(
self.full_length_test1_template_aln.toFasta())
full_length_test1_template_aln_f.close()
self.full_length_test1_expected_aln = \
LoadSeqs(data=input_seqs1_aligned_fasta,moltype=DNA,aligned=DenseAlignment)
self.full_length_test1_expected_fail = \
LoadSeqs(data=input_seqs1_fail_fasta,moltype=DNA,aligned=False)
self.full_length_test2_input_seqs =\
LoadSeqs(data=input_seqs2_fasta,moltype=DNA,aligned=False)
self.full_length_test2_template_aln = \
LoadSeqs(data=pynast_test_template_fasta2,moltype=DNA,aligned=DenseAlignment)
self.input_seqs_gaps = input_seqs_gaps.split('\n')
self.log_filename = \
get_tmp_filename(prefix='PyNastTest', suffix='.log')
self.files_to_remove.append(self.log_filename)
# touch the log file, so we don't get an error trying to remove it
# if a test doesn't create it
open(self.log_filename,'w').close()
def tearDown(self):
""" Clean up temporary files created by the tests
"""
remove_files(self.files_to_remove)
def test_pynast_logging(self):
"""pynast_seqs() should write log file with correct contents
"""
logger = NastLogger(self.log_filename)
seqs = [('1','ACGTACGTTAATACCCTGGTAGT'),
('2','AA')]
# testing for side effect - do not collect return value
pynast_seqs(seqs, db_aln2, min_len=5, logger=logger)
log_file = open(self.log_filename, 'r')
header = log_file.readline()
contents = log_file.read()
log_file.close()
self.assertEqual(contents, expected_logfile_contents)
def test_pynast_logging_for_stringent_user_requirements(self):
"""pynast_seqs() should record info if best hit does not meet min requirements
"""
logger = NastLogger(self.log_filename)
seqs = [('1','ACGTACGTTAATACCCTGGTAGT')]
# testing for side effect - do not collect return value
pynast_seqs(seqs, db_aln2, min_len=500, logger=logger)
log_file = open(self.log_filename, 'r')
header = log_file.readline()
contents = log_file.read()
log_file.close()
self.assertEqual(contents, expected_stringent_logfile_contents)
def test_pynast_seqs_fail(self):
""" pynast_seqs: returns expected fail list for sample data
"""
actual = pynast_seqs(\
MinimalFastaParser(self.full_length_test1_input_seqs_lines),\
self.full_length_test1_template_aln,\
min_len=1000,min_pct=75.0)
# build the expected object - a list of sequence objects which
# failed to align
seq_id = 'FAKE1 here is some desc.73602 tag1;tag2, tag3:tag4'
expected = [\
DNA.makeSequence(self.full_length_test1_expected_fail.getSeq(seq_id),\
Name=seq_id)]
self.assertEqual(actual[1],expected)
def test_pynast_seqs_exact_matches(self):
""" pynast_seqs: perfectly aligns several exact template matches
"""
template_aln = self.full_length_test1_template_aln
# Build the expected result object, which is a list of
# dna sequence objects where names include the aligned span
expected_seqs = []
for n in template_aln.Names:
expected_seqs.append(\
DNA.makeSequence(\
str(template_aln.getGappedSeq(n)),\
Name='%s 1..%d' % (n,len(template_aln.getSeq(n).degap()))))
expected_aln = LoadSeqs(data=expected_seqs,\
moltype=DNA,aligned=DenseAlignment)
input_seqs = self.full_length_test1_template_aln.degap()
# run pynast_seqs on the input sequences
actual = pynast_seqs(input_seqs.todict().items(),\
template_aln,\
min_len=1000,min_pct=75.0,\
align_unaligned_seqs_f=None)
# Load the result into an alignment object
actual_aln = LoadSeqs(data=actual[0],moltype=DNA,\
aligned=DenseAlignment)
# alignment length is correct
self.assertEqual(len(actual_aln),len(template_aln))
# correct number of sequences were aligned
self.assertEqual(actual_aln.getNumSeqs(),expected_aln.getNumSeqs())
# same collection of seq ids is returned
actual_names = actual_aln.Names
actual_names.sort()
expected_names = expected_aln.Names
expected_names.sort()
self.assertEqual(actual_names,expected_names)
# all sequence lengths match expected sequence lengths (ie, no
# missing bases)
for seq_id in actual_aln.Names:
self.assertEqual(\
len(actual_aln.getSeq(seq_id)),\
len(expected_aln.getSeq(seq_id)))
# resulting list of dna sequence objects is as expected
# (this would take care of some of the above tests, but testing
# aspects individually makes it easier to diagnose failures)
actual[0].sort()
expected_seqs.sort()
self.assertEqual(actual[0],expected_seqs)
# fail list is empty
self.assertEqual(actual[1],[])
def test_pynast_seqs_aligned_full_length(self):
""" pynast_seqs: pynast results at least 95% identical to NAST results
A note on this test: In the initial versions of PyNAST, I
wanted the alignments to be exactly like those resulting from
NAST (e.g., in PyNAST 1.0). I've since abandoned that, in favor
of getting improved alignments. This test was modified after
PyNAST 1.0, and I'm now only testing that the alignments
are similar to those derived from NAST. This test may be
of little use, but it is a nice test of the code on
full-length sequences, so I hesitate to delete it.
-Greg (24 Mar 2010)
"""
template_aln = self.full_length_test1_template_aln
expected_aln = self.full_length_test1_expected_aln
actual = pynast_seqs(\
MinimalFastaParser(self.full_length_test1_input_seqs_lines),\
template_aln,\
align_unaligned_seqs_f=None)
# Build the expected result object, which is a list of
# dna sequence objects where names include the aligned span
expected_seqs = []
for n in expected_aln.Names:
expected_seqs.append(\
DNA.makeSequence(str(expected_aln.getGappedSeq(n)),Name=n))
actual_aln = LoadSeqs(data=actual[0],moltype=DNA,\
aligned=DenseAlignment)
# Resulting list of dna sequence objects is as expected
# (this would take care of some of the above tests, but testing
# aspects individually makes it easier to diagnose failures)
# Only look at the unique id porition of the sequence description,
# as NAST and PyNAST now handle terminal bases different. NAST
# does local alignments, so sometimes loses terminal bases. PyNAST
# does global alignments, so the candidate only lose terminal bases
# if they introduce terminal gaps in the template alignments.
a_list = [(a.Name.split()[0], a) for a in actual[0]]
e_list = [(e.Name.split()[0], e) for e in expected_seqs]
a_list.sort()
e_list.sort()
for a,e in zip(a_list,e_list):
# first component of names are equal
self.assertEqual(a[0],e[0])
a_seq = a[1]
e_seq = e[1]
count_same = 0
for i in range(len(a_seq)):
if a_seq[i] == e_seq[i]: count_same += 1
percent_same = count_same/len(a_seq)
self.assertTrue(percent_same >= 0.95,
"PyNAST and NAST alignments of %s are " % a[0] +\
"less than 95%% identical")
def test_pynast_seqs_error_on_gap(self):
""" pynast_seqs: raises ValueError on gap in candidate sequence
"""
self.assertRaises(ValueError,pynast_seqs,
MinimalFastaParser(self.input_seqs_gaps),\
self.full_length_test1_template_aln,\
min_len=1000,min_pct=75.0)
def test_pynast_seqs_simple(self):
"""pynast_seqs: fns with simple test data
"""
candidate_seqs = [\
('1','ACGTACGTTAATACCCTGGTAGT'),\
('2','ACGTACGTTAATACCCTGGTAGT'),\
('3','AA')]
expected_aln = [\
DNA.makeSequence('ACGTACGT-TA--ATA-C-----CC-T-G-GTA-G-T---',Name='1'),\
DNA.makeSequence('ACGTACGT-TA--ATA-C-----CC-T-G-GTA-G-T---',Name='2')]
expected_fail = [DNA.makeSequence('AA',Name='3')]
actual = pynast_seqs(candidate_seqs,db_aln2,min_len=5,min_pct=75.0)
self.assertEqual(actual,(expected_aln,expected_fail))
# all fail when min_len restricts matches
expected_aln = []
expected_fail = [\
DNA.makeSequence('ACGTACGTTAATACCCTGGTAGT',Name='1'),\
DNA.makeSequence('ACGTACGTTAATACCCTGGTAGT',Name='2'),\
DNA.makeSequence('AA',Name='3')]
actual = pynast_seqs(candidate_seqs,db_aln2,min_len=5000,min_pct=75.0)
self.assertEqual(actual,(expected_aln,expected_fail))
def test_pynast_seqs_simple_alt_pairwise(self):
"""pynast_seqs: fns with alt pairwise aligner
"""
# tests that the order of the returned sequences is correct
# as this is easy to screw up
candidate_seqs = [('1','AGCCCCTTTT')]
template_aln = LoadSeqs(data=dict([
('2','ACCC-----CCTTTT')]),\
moltype=DNA,aligned=DenseAlignment)
expected_aln = [DNA.makeSequence('AGCC-----CCTTTT',Name='1')]
expected_fail = []
actual = pynast_seqs(candidate_seqs,template_aln,
min_len=5,min_pct=75.0,\
align_unaligned_seqs_f=pair_hmm_align_unaligned_seqs)
self.assertEqual(actual,(expected_aln,expected_fail))
# tests that the aligner was actually applied, as it's
# nearly impossible to get different alignments with
# different aligners on these short test sequences --
# therefore test with a fake aligner that alters the sequence
def fake_aligner(seqs,moltype,params={}):
return LoadSeqs(data=[('candidate','AGGGGGTTTT'),
('template', 'ACCCCCTTTT')],moltype=DNA)
candidate_seqs = [('1','ACCCCCTTTT')]
template_aln = LoadSeqs(data=dict([
('2','ACCC-----CCTTTT')]),\
moltype=DNA,aligned=DenseAlignment)
expected_aln = [DNA.makeSequence('AGGG-----GGTTTT',Name='1')]
expected_fail = []
actual = pynast_seqs(candidate_seqs,template_aln,
min_len=5,min_pct=75.0,\
align_unaligned_seqs_f=fake_aligner)
self.assertEqual(actual,(expected_aln,expected_fail))
def test_ipynast_seqs_simple(self):
"""ipynast_seqs: fns with simple test data
"""
candidate_seqs = [\
('1','ACGAACGTTAATACCCTGGAAGT'),\
('2','ACGTACGTTAATACCCTGGTAGT'),\
('3','AA')]
expected = [\
(DNA.makeSequence(\
'ACGAACGT-TA--ATA-C-----CC-T-G-GAA-G-T---',Name='1'),0),\
(DNA.makeSequence(\
'ACGTACGT-TA--ATA-C-----CC-T-G-GTA-G-T---',Name='2'),0),\
(DNA.makeSequence('AA',Name='3'),1)]
actual = list(ipynast_seqs(\
candidate_seqs,db_aln2,min_len=5,min_pct=75.0))
self.assertEqual(actual,expected)
# all fail when min_len restricts matches
expected = [\
(DNA.makeSequence('ACGAACGTTAATACCCTGGAAGT',Name='1'),2),\
(DNA.makeSequence('ACGTACGTTAATACCCTGGTAGT',Name='2'),2),\
(DNA.makeSequence('AA',Name='3'),1)]
actual = list(ipynast_seqs(\
candidate_seqs,db_aln2,min_len=5000,min_pct=75.0))
self.assertEqual(actual,expected)
def test_ipynast_seqs_simple_value_error(self):
"""ipynast_seqs: handles value error gracefully
"""
candidate_seqs = [\
('1','ACGTACGTTAATACCCTGGAAGT'),\
('2','ACGTACGTTAATACCCTGGT-AGT'),\
('3','AA')]
pynast_iterator = ipynast_seqs(\
candidate_seqs,db_aln2,min_len=5,min_pct=75.0)
self.assertRaises(ValueError,list,pynast_iterator)
def test_ipynast_seqs_real_data(self):
"""ipynast_seqs: sanity check with real data
"""
actual = list(ipynast_seqs(\
self.full_length_test2_input_seqs.items(),\
self.full_length_test2_template_aln,\
min_len=5,min_pct=75.0))
# correct number of results returned
self.assertEqual(len(actual),1)
actual = list(ipynast_seqs(\
self.full_length_test1_input_seqs.items(),\
self.full_length_test1_template_aln,\
min_len=5,min_pct=75.0))
# correct number of results returned
self.assertEqual(len(actual),6)
self.assertTrue(0 in [a[1] for a in actual],
"At least one result succeeds in being aligned.")
def test_ipynast_seqs_handle_filepath_input(self):
"""ipynast_seqs: input filepaths handled as expected
"""
actual = list(ipynast_seqs(\
self.full_length_test1_input_seqs.items(),\
self.full_length_test1_template_aln_fp,\
min_len=5,min_pct=75.0))
# correct number of results returned
self.assertEqual(len(actual),6)
self.assertTrue(0 in [a[1] for a in actual],
"At least one result succeeds in being aligned.")
def test_pynast_seqs_simple_status_callback(self):
"""pynast_seqs: status callback functions as expected
"""
candidate_seqs = [\
('1','ACGTACGTTAATACCCTGGTAGT'),\
('2','ACGTACGTTAATACCCTGGTAGT'),\
('3','AA')]
expected_aln = [\
DNA.makeSequence('ACGTACGT-TA--ATA-C-----CC-T-G-GTA-G-T---',Name='1'),\
DNA.makeSequence('ACGTACGT-TA--ATA-C-----CC-T-G-GTA-G-T---',Name='2')]
expected_fail = [DNA.makeSequence('AA',Name='3')]
class StatusTracker(object):
completed_seqs_count = 0
def update_completed_seqs_count(self,x):
self.completed_seqs_count += 1
st = StatusTracker()
self.assertEqual(st.completed_seqs_count,0)
results = pynast_seqs(candidate_seqs,db_aln2,min_len=5,min_pct=75.0,\
status_callback_f=st.update_completed_seqs_count)
self.assertEqual(st.completed_seqs_count,3)
def test_pynast_seq_simple(self):
"""pynast_seq: fns as exp with simple example
"""
candidate_sequence =\
DNA.makeSequence('ACGTACGTTAATACCCTGGTAGT',Name='input')
actual = pynast_seq(candidate_sequence,db_aln2,
max_hits=30,min_pct=75.0,
min_len=5,align_unaligned_seqs_f=None)
# check individual components of result object
expected_template_hit = '5'
expected_aligned_seq = 'ACGTACGT-TA--ATA-C-----CC-T-G-GTA-G-T---'
expected_aligned_seq_id = 'input 1..23'
self.assertEqual(actual[0],expected_template_hit)
self.assertEqual(str(actual[1]),expected_aligned_seq)
self.assertEqual(actual[1].Name,expected_aligned_seq_id)
# check full result object
expected = ('5',\
DNA.makeSequence('ACGTACGT-TA--ATA-C-----CC-T-G-GTA-G-T---',\
Name='input 1..23'))
self.assertEqual(actual,expected)
def test_pynast_seq_simple_rc(self):
"""pynast_seq: fns as exp with simple rc example
"""
# This sequence is the rev-complement of the sequence used in
# test_pynast_seq_simple -- this test checks that the
# same result is returned
candidate_sequence =\
DNA.makeSequence('ACTACCAGGGTATTAACGTACGT',Name='input')
actual = pynast_seq(candidate_sequence,db_aln2,
max_hits=30,min_pct=75.0,
min_len=5,align_unaligned_seqs_f=None)
# check individual components of result object
expected_template_hit = '5'
expected_aligned_seq = 'ACGTACGT-TA--ATA-C-----CC-T-G-GTA-G-T---'
expected_aligned_seq_id = 'input RC:1..23'
self.assertEqual(actual[0],expected_template_hit)
self.assertEqual(str(actual[1]),expected_aligned_seq)
self.assertEqual(actual[1].Name,expected_aligned_seq_id)
# check full result object
expected = ('5',\
DNA.makeSequence('ACGTACGT-TA--ATA-C-----CC-T-G-GTA-G-T---',\
Name='input RC:1..23'))
self.assertEqual(actual,expected)
def test_pynast_seq_10116(self):
"""pynast_seq: real seq that introduces 5' gaps in pw aligned template
The pairwise alignment of this sequence to the template alignment
results in five prime gaps in the pairwise aligned template. This
caused a bug in early versions of PyNAST because too many terminal
gaps were being reintroduced. Therefore keeping this as a real
test case, essentially of the introduce_terminal_gaps
functionality.
"""
candidate_sequence =\
LoadSeqs(data=input_seq_10116.split('\n'),moltype=DNA).\
getSeq('10116')
template_aln = self.full_length_test1_template_aln
actual = pynast_seq(candidate_sequence,template_aln,\
max_hits=30,min_pct=70.0,min_len=150,\
align_unaligned_seqs_f=None)
self.assertEqual(len(actual[1]),len(template_aln))
def test_pynast_seq_14990(self):
"""pynast_seq: aligning handles input seq longer than best template seq
"""
template_aln =\
LoadSeqs(data=template_14990_trimmed.split('\n'),\
moltype=DNA,aligned=DenseAlignment)
candidate_sequence =\
LoadSeqs(data=input_seq_14990.split('\n'),moltype=DNA).\
getSeq('14990')
expected = ('14990_5_and_3_prime_lost_four_bases_each',\
template_aln.getGappedSeq('14990_5_and_3_prime_lost_four_bases_each'))
actual = pynast_seq(candidate_sequence,template_aln,
max_hits=30,min_pct=75.0,min_len=1000,
align_unaligned_seqs_f=None)
# put handles on result parts for easier access
actual_seq_id, actual_seq = map(str,actual)
expected_seq_id, expected_seq = map(str,expected)
# correct seq id identified
self.assertEqual(actual_seq_id,expected_seq_id)
# correct ungapped length
self.assertEqual(len(actual_seq.replace('-','')),\
len(expected_seq.replace('-','')))
# correct gapped length
self.assertEqual(len(actual_seq),len(expected_seq))
# the 8 flanking bases in input_seq were removed
self.assertEqual(len(actual_seq.replace('-','')),\
len(candidate_sequence)-8)
# aligned seqs are equal
self.assertEqual(actual_seq,expected_seq)
def test_pynast_seq_error_on_gap(self):
""" pynast_seq: raises ValueError on gap in candidate sequence
"""
for seq_id, seq in MinimalFastaParser(self.input_seqs_gaps):
# error when gap(s) in seq
cs = DNA.makeSequence(seq,Name=seq_id)
self.assertRaises(ValueError,pynast_seq,cs,db_aln2,\
max_hits=1,min_pct=75.0,min_len=5,align_unaligned_seqs_f=None)
seq = seq.replace('-','').replace('.','')
# no error when no gaps in seq
cs = DNA.makeSequence(seq,Name=seq_id)
r = pynast_seq(cs,db_aln2,\
max_hits=1,min_pct=70.0,min_len=5,align_unaligned_seqs_f=None)
def test_align_two_seqs_with_muscle(self):
""" align_two_seqs: fns for simple alignments with muscle
"""
# Only a few trivial cases are tested as it is not the place to
# test how the aligners functions
f = muscle_align_unaligned_seqs
# perfect alignment
s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
self.assertEqual(align_two_seqs(s1,s2,f),(s1,s2))
# gap added to s2
s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
s2 = DNA.makeSequence('ACGTACGTACATCCCTGGTAGT')
exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
exp2 = DNA.makeSequence('ACGTACGTACAT-CCCTGGTAGT')
self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2))
# gap added to s1
s1 = DNA.makeSequence('ACGTACGTACATCCCTGGTAGT')
s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
exp1 = DNA.makeSequence('ACGTACGTACAT-CCCTGGTAGT')
exp2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2))
# single mismatch
s1 = DNA.makeSequence('ACGTACGTACATTCCCTGGTAGT')
s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
self.assertEqual(align_two_seqs(s1,s2,f),(s1,s2))
# truncated sequence (3')
s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
s2 = DNA.makeSequence('ACGTACGTACATACCCT')
exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
exp2 = DNA.makeSequence('ACGTACGTACATACCCT------')
self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2))
# truncated sequence (5')
s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
s2 = DNA.makeSequence('CGTACATACCCTGGTAGT')
exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
exp2 = DNA.makeSequence('-----CGTACATACCCTGGTAGT')
self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2))
# truncated sequence (5' and 3')
s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
s2 = DNA.makeSequence('CGTACATACCCTGGT')
exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
exp2 = DNA.makeSequence('-----CGTACATACCCTGGT---')
self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2))
def test_align_two_seqs_with_pair_hmm(self):
""" align_two_seqs: fns for simple alignments with pair_hmm alignment
"""
# Only a few trivial cases are tested as it is not the place to
# test how the aligners functions
f = pair_hmm_align_unaligned_seqs
# perfect alignment
s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
self.assertEqual(align_two_seqs(s1,s2,f),(s1,s2))
# gap added to s2
s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
s2 = DNA.makeSequence('ACGTACGTACATCCCTGGTAGT')
exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
exp2 = DNA.makeSequence('ACGTACGTACAT-CCCTGGTAGT')
self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2))
# gap added to s1
s1 = DNA.makeSequence('ACGTACGTACATCCCTGGTAGT')
s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
exp1 = DNA.makeSequence('ACGTACGTACAT-CCCTGGTAGT')
exp2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2))
# single mismatch
s1 = DNA.makeSequence('ACGTACGTACATTCCCTGGTAGT')
s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
self.assertEqual(align_two_seqs(s1,s2,f),(s1,s2))
# truncated sequence (3')
s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
s2 = DNA.makeSequence('ACGTACGTACATACCCT')
exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
exp2 = DNA.makeSequence('ACGTACGTACATACCCT------')
self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2))
# truncated sequence (5')
s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
s2 = DNA.makeSequence('CGTACATACCCTGGTAGT')
exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
exp2 = DNA.makeSequence('-----CGTACATACCCTGGTAGT')
self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2))
# truncated sequence (5' and 3')
s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
s2 = DNA.makeSequence('CGTACATACCCTGGT')
exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
exp2 = DNA.makeSequence('-----CGTACATACCCTGGT---')
self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2))
def test_align_two_seqs_with_blast(self):
""" align_two_seqs: fns for simple alignments with blast (bl2seq)
"""
# Only a few trivial cases are tested as it is not the place to
# test how the aligners functions
f = blast_align_unaligned_seqs
# perfect alignment
s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
self.assertEqual(align_two_seqs(s1,s2,f),(s1,s2))
# gap added to s2
s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
s2 = DNA.makeSequence('ACGTACGTACATCCCTGGTAGT')
exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
exp2 = DNA.makeSequence('ACGTACGTACAT-CCCTGGTAGT')
self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2))
# gap added to s1
s1 = DNA.makeSequence('ACGTACGTACATCCCTGGTAGT')
s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
exp1 = DNA.makeSequence('ACGTACGTACAT-CCCTGGTAGT')
exp2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2))
# single mismatch
s1 = DNA.makeSequence('ACGTACGTACATTCCCTGGTAGT')
s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
self.assertEqual(align_two_seqs(s1,s2,f),(s1,s2))
# truncated sequence (3')
s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
s2 = DNA.makeSequence('ACGTACGTACATACCCT')
exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
exp2 = DNA.makeSequence('ACGTACGTACATACCCT------')
self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2))
# reversed order works as well (ie., extended sequence 3')
self.assertEqual(align_two_seqs(s2,s1,f),(exp2,exp1))
# truncated sequence (5')
s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
s2 = DNA.makeSequence('CGTACATACCCTGGTAGT')
exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
exp2 = DNA.makeSequence('-----CGTACATACCCTGGTAGT')
self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2))
# reversed order works as well (ie., extended sequence 5')
self.assertEqual(align_two_seqs(s2,s1,f),(exp2,exp1))
# truncated sequence (5' and 3')
s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
s2 = DNA.makeSequence('CGTACATACCCTGGT')
exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
exp2 = DNA.makeSequence('-----CGTACATACCCTGGT---')
self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2))
# reversed order works as well (ie., extended sequence 5' and 3')
self.assertEqual(align_two_seqs(s2,s1,f),(exp2,exp1))
# staggered ends
s1 = DNA.makeSequence('ACGTACGTACATACCCTGGT')
s2 = DNA.makeSequence( 'CGTACATACCCTGGTAGTTT')
exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGT-----')
exp2 = DNA.makeSequence('-----CGTACATACCCTGGTAGTTT')
self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2))
# reversed order works as well
self.assertEqual(align_two_seqs(s2,s1,f),(exp2,exp1))
def test_align_two_seqs_with_clustal(self):
""" align_two_seqs: fns for simple alignments with clustal
"""
# Only a few trivial cases are tested as it is not the place to
# test how the aligners function
f = clustal_align_unaligned_seqs
# perfect alignment
s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
self.assertEqual(align_two_seqs(s1,s2,f),(s1,s2))
# gap added to s2
s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
s2 = DNA.makeSequence('ACGTACGTACATCCCTGGTAGT')
exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
exp2 = DNA.makeSequence('ACGTACGTACAT-CCCTGGTAGT')
self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2))
# gap added to s1
s1 = DNA.makeSequence('ACGTACGTACATCCCTGGTAGT')
s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
exp1 = DNA.makeSequence('ACGTACGTACAT-CCCTGGTAGT')
exp2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2))
# single mismatch
s1 = DNA.makeSequence('ACGTACGTACATTCCCTGGTAGT')
s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
self.assertEqual(align_two_seqs(s1,s2,f),(s1,s2))
# truncated sequence (3')
s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
s2 = DNA.makeSequence('ACGTACGTACATACCCT')
exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
exp2 = DNA.makeSequence('ACGTACGTACATACCCT------')
self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2))
# truncated sequence (5')
s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
s2 = DNA.makeSequence('CGTACATACCCTGGTAGT')
exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
exp2 = DNA.makeSequence('-----CGTACATACCCTGGTAGT')
self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2))
# truncated sequence (5' and 3')
s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
s2 = DNA.makeSequence('CGTACATACCCTGGT')
exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
exp2 = DNA.makeSequence('-----CGTACATACCCTGGT---')
self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2))
def test_align_two_seqs_with_mafft(self):
""" align_two_seqs: fns for simple alignments with mafft
"""
# Only a few trivial cases are tested as it is not the place to
# test how the aligners functions
f = mafft_align_unaligned_seqs
# perfect alignment
s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
self.assertEqual(align_two_seqs(s1,s2,f),(s1,s2))
# gap added to s2
s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
s2 = DNA.makeSequence('ACGTACGTACATCCCTGGTAGT')
exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
exp2 = DNA.makeSequence('ACGTACGTACAT-CCCTGGTAGT')
self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2))
# gap added to s1
s1 = DNA.makeSequence('ACGTACGTACATCCCTGGTAGT')
s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
exp1 = DNA.makeSequence('ACGTACGTACAT-CCCTGGTAGT')
exp2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2))
# single mismatch
s1 = DNA.makeSequence('ACGTACGTACATTCCCTGGTAGT')
s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
self.assertEqual(align_two_seqs(s1,s2,f),(s1,s2))
# truncated sequence (3')
s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
s2 = DNA.makeSequence('ACGTACGTACATACCCT')
exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
exp2 = DNA.makeSequence('ACGTACGTACATACCC------T')
self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2))
# truncated sequence (5')
s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
s2 = DNA.makeSequence('CGTACATACCCTGGTAGT')
exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
exp2 = DNA.makeSequence('-----CGTACATACCCTGGTAGT')
self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2))
# truncated sequence (5' and 3')
s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
s2 = DNA.makeSequence('CGTACATACCCTGGT')
exp1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
exp2 = DNA.makeSequence('-----CGTACATACCCTG---GT')
self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2))
def test_align_two_seqs_with_fake_aligner(self):
""" align_two_seqs: fns for simple alignments with fake_aligner
"""
# Test a fake aligner function which uses the params dict
def f(seqs,moltype,params={}):
try:
res = params['res']
except KeyError:
res = 'AAAAAAAAAA'
seqs = [('template',str(res)), ('candidate',str(res))]
seqs = LoadSeqs(data=seqs,moltype=moltype,aligned=DenseAlignment)
return seqs
s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
exp1 = DNA.makeSequence('AAAAAAAAAA')
exp2 = DNA.makeSequence('AAAAAAAAAA')
self.assertEqual(align_two_seqs(s1,s2,f),(exp1,exp2))
s1 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
s2 = DNA.makeSequence('ACGTACGTACATACCCTGGTAGT')
exp1 = DNA.makeSequence('BBB')
exp2 = DNA.makeSequence('BBB')
self.assertEqual(align_two_seqs(s1,s2,f,params={'res':'BBB'}),\
(exp1,exp2))
def test_reintroduce_template_spacing_template(self):
""" reintroduce_template_spacing: template example from DeSantis2004
"""
template = DNA.makeSequence('ATAC-----GTA-AC----GTA---C---G-T-AC-GG')
pw_aligned_template = DNA.makeSequence('ATACGT-A-ACGTACGTAC--GG')
pw_aligned_candidate= DNA.makeSequence('C-ACGTTAAACGT-CGTACCCGG')
template_expected = \
DNA.makeSequence('ATAC-----GT-A-AC----GTA---C---G-T-AC--GG')
actual = reintroduce_template_spacing(\
template,pw_aligned_template,pw_aligned_candidate)
self.assertEqual(actual[0],template_expected)
def test_reintroduce_template_spacing_candidate(self):
""" reintroduce_template_spacing: candidate example from DeSantis2006
"""
template = DNA.makeSequence('ATAC-----GTA-AC----GTA---C---G-T-AC-GG')
pw_aligned_template = DNA.makeSequence('ATACGT-A-ACGTACGTAC--GG')
pw_aligned_candidate= DNA.makeSequence('C-ACGTTAAACGT-CGTACCCGG')
candidate_expected = \
DNA.makeSequence('C-AC-----GTTAAAC----GT----C---G-T-ACCCGG')
actual = reintroduce_template_spacing(\
template,pw_aligned_template,pw_aligned_candidate)
self.assertEqual(actual[1],candidate_expected)
def test_reintroduce_template_spacing_new_gaps(self):
""" reintroduce_template_spacing: new gaps example from DeSantis2006
"""
template = DNA.makeSequence('ATAC-----GTA-AC----GTA---C---G-T-AC-GG')
pw_aligned_template = DNA.makeSequence('ATACGT-A-ACGTACGTAC--GG')
pw_aligned_candidate= DNA.makeSequence('C-ACGTTAAACGT-CGTACCCGG')
new_gaps_expected = [11,36]
actual = reintroduce_template_spacing(\
template,pw_aligned_template,pw_aligned_candidate)
self.assertEqual(actual[2],new_gaps_expected)
def test_reintroduce_template_spacing(self):
""" reintroduce_template_spacing: example from DeSantis2006
"""
template = DNA.makeSequence('ATAC-----GTA-AC----GTA---C---G-T-AC-GG')
pw_aligned_template = DNA.makeSequence('ATACGT-A-ACGTACGTAC--GG')
pw_aligned_candidate= DNA.makeSequence('C-ACGTTAAACGT-CGTACCCGG')
template_expected = \
DNA.makeSequence('ATAC-----GT-A-AC----GTA---C---G-T-AC--GG')
candidate_expected = \
DNA.makeSequence('C-AC-----GTTAAAC----GT----C---G-T-ACCCGG')
new_gaps_expected = [11,36]
actual = reintroduce_template_spacing(\
template,pw_aligned_template,pw_aligned_candidate)
self.assertEqual(actual,\
(template_expected,candidate_expected,new_gaps_expected))
def test_reintroduce_template_spacing_no_change(self):
""" reintroduce_template_spacing: no changes
"""
template = DNA.makeSequence('AT-CG')
actual = reintroduce_template_spacing(\
template,template,template)
self.assertEqual(actual,(template,template,[]))
# different seqs but pw alignment matches template pattern
template = DNA.makeSequence('ATC-G')
pw_aligned_template = DNA.makeSequence ('ATC-G')
pw_aligned_candidate = DNA.makeSequence('ATCCG')
template_expected = DNA.makeSequence ('ATC-G')
candidate_expected = DNA.makeSequence('ATCCG')
actual = reintroduce_template_spacing(\
template,pw_aligned_template,pw_aligned_candidate)
self.assertEqual(actual,(template_expected,candidate_expected,[]))
def test_reintroduce_template_spacing_middle(self):
""" reintroduce_template_spacing: change to non-terminal character
"""
template = DNA.makeSequence('GTA---C')
pw_aligned_template = DNA.makeSequence( 'GTAC')
pw_aligned_candidate = DNA.makeSequence('GT-C')
template_expected = DNA.makeSequence( 'GTA---C')
candidate_expected = DNA.makeSequence('GT----C')
new_gaps_expected = []
actual = reintroduce_template_spacing(\
template,pw_aligned_template,pw_aligned_candidate)
self.assertEqual(actual,\
(template_expected,candidate_expected,new_gaps_expected))
template = DNA.makeSequence('ATAC-----GTA-AC')
pw_aligned_template = DNA.makeSequence( 'ATACGT-A-AC')
pw_aligned_candidate = DNA.makeSequence('C-ACGTTAAAC')
template_expected = DNA.makeSequence( 'ATAC-----GT-A-AC')
candidate_expected = DNA.makeSequence('C-AC-----GTTAAAC')
new_gaps_expected = [11]
actual = reintroduce_template_spacing(\
template,pw_aligned_template,pw_aligned_candidate)
self.assertEqual(actual,\
(template_expected,candidate_expected,new_gaps_expected))
# single gap in new spot
template = DNA.makeSequence('GTA-AC')
pw_aligned_template = DNA.makeSequence( 'GT-A-AC')
pw_aligned_candidate = DNA.makeSequence('GTTAAAC')
template_expected = DNA.makeSequence( 'GT-A-AC')
candidate_expected = DNA.makeSequence('GTTAAAC')
new_gaps_expected = [2]
actual = reintroduce_template_spacing(\
template,pw_aligned_template,pw_aligned_candidate)
self.assertEqual(actual,\
(template_expected,candidate_expected,new_gaps_expected))
# existing gap extended
template = DNA.makeSequence('AC-GG')
pw_aligned_template = DNA.makeSequence( 'AC--GG')
pw_aligned_candidate = DNA.makeSequence('ACCCGG')
template_expected = DNA.makeSequence( 'AC--GG')
candidate_expected = DNA.makeSequence('ACCCGG')
new_gaps_expected = [2]
actual = reintroduce_template_spacing(\
template,pw_aligned_template,pw_aligned_candidate)
self.assertEqual(actual,\
(template_expected,candidate_expected,new_gaps_expected))
def test_reintroduce_template_spacing_leading_trailing_gaps_ignored(self):
""" reintroduce_template_spacing: lead/trailing template gaps ignored
"""
# leading gaps
template = DNA.makeSequence('----AC-GG')
pw_aligned_template = DNA.makeSequence( 'AC--GG')
pw_aligned_candidate = DNA.makeSequence('ACCCGG')
template_expected = DNA.makeSequence( 'AC--GG')
candidate_expected = DNA.makeSequence('ACCCGG')
new_gaps_expected = [2]
actual = reintroduce_template_spacing(\
template,pw_aligned_template,pw_aligned_candidate)
self.assertEqual(actual,\
(template_expected,candidate_expected,new_gaps_expected))
# trailing gaps
template = DNA.makeSequence('AC-GG---')
pw_aligned_template = DNA.makeSequence( 'AC--GG')
pw_aligned_candidate = DNA.makeSequence('ACCCGG')
template_expected = DNA.makeSequence( 'AC--GG')
candidate_expected = DNA.makeSequence('ACCCGG')
new_gaps_expected = [2]
actual = reintroduce_template_spacing(\
template,pw_aligned_template,pw_aligned_candidate)
self.assertEqual(actual,\
(template_expected,candidate_expected,new_gaps_expected))
# leading/trailing gaps
template = DNA.makeSequence('-AC-GG---')
pw_aligned_template = DNA.makeSequence( 'AC--GG')
pw_aligned_candidate = DNA.makeSequence('ACCCGG')
template_expected = DNA.makeSequence( 'AC--GG')
candidate_expected = DNA.makeSequence('ACCCGG')
new_gaps_expected = [2]
actual = reintroduce_template_spacing(\
template,pw_aligned_template,pw_aligned_candidate)
self.assertEqual(actual,\
(template_expected,candidate_expected,new_gaps_expected))
def test_adjust_alignment_paper_example(self):
""" adjust_alignment: example from DeSantis2006
"""
template = \
DNA.makeSequence('ATAC-----GT-A-AC----GTA---C---G-T-AC--GG')
candidate = \
DNA.makeSequence('C-AC-----GTTAAAC----GT----C---G-T-ACCCGG')
new_gaps = [11,36]
# IS THERE A TYPO IN THEIR EXAMPLE? THEY CHANGE GT-A-AC TO
# GT-AAC, BUT THAT DOESN'T REALLY MAKE SENSE GIVEN THAT THE
# TEMPLATE ALIGNMENT IS GTA-AC...
template_expected = \
DNA.makeSequence('ATAC-----GTA-AC----GTA---C---G-T-AC-GG')
candidate_expected = \
DNA.makeSequence('C-AC----GTTAAAC----GT----C---G-TACCCGG')
actual = adjust_alignment(template,candidate,new_gaps)
self.assertEqual(actual,(template_expected,candidate_expected))
def test_adjust_alignment(self):
""" adjust_alignmnet: simple adjustments handled as expected
"""
# remove a 3' gap
t = DNA.makeSequence('AA-GGC---ATTAA')
c = DNA.makeSequence('AATCCTT--AAAAA')
new_gaps = [2]
t_expected = DNA.makeSequence('AAGGC---ATTAA')
c_expected = DNA.makeSequence('AATCCTT-AAAAA')
self.assertEqual(adjust_alignment(t,c,new_gaps),\
(t_expected,c_expected))
# remove a 5' gap
t = DNA.makeSequence('AA-GGC----TTAA')
c = DNA.makeSequence('AATCCTT--AAAAA')
new_gaps = [9]
t_expected = DNA.makeSequence('AA-GGC---TTAA')
c_expected = DNA.makeSequence('AATCCTT-AAAAA')
self.assertEqual(adjust_alignment(t,c,new_gaps),\
(t_expected,c_expected))
# multiple gaps to remove
t = DNA.makeSequence('AA-GGC----TTAA')
c = DNA.makeSequence('AATCCTT--AAAAA')
new_gaps = [2,9]
t_expected = DNA.makeSequence('AAGGC---TTAA')
c_expected = DNA.makeSequence('AATCCTTAAAAA')
self.assertEqual(adjust_alignment(t,c,new_gaps),\
(t_expected,c_expected))
def test_adjust_alignment_multiple_adjancent_new_gaps(self):
""" adjust_alignmnet: multiple adjacent new gaps handled as expected
"""
t = DNA.makeSequence('AA--GC---ATTAA')
c = DNA.makeSequence('AATCCTT--AAAAA')
new_gaps = [2,3]
t_expected = DNA.makeSequence('AAGC---ATTAA')
c_expected = DNA.makeSequence('AATCCTTAAAAA')
actual = adjust_alignment(t,c,new_gaps)
# print ''
# print actual[0]
# print t_expected
self.assertEqual(actual,(t_expected,c_expected))
t = DNA.makeSequence('AATTGCG---CAT')
c = DNA.makeSequence('AA---CTTTTAAA')
new_gaps = [7,8,9]
t_expected = DNA.makeSequence('AATTGCGCAT')
c_expected = DNA.makeSequence('AACTTTTAAA')
actual = adjust_alignment(t,c,new_gaps)
# print ''
# print actual[0]
# print t_expected
self.assertEqual(actual,(t_expected,c_expected))
t = DNA.makeSequence('AATTGCG---CAT')
c = DNA.makeSequence('AA-CTTTTTA-A-')
new_gaps = [7,8,9]
t_expected = DNA.makeSequence('AATTGCGCAT')
c_expected = DNA.makeSequence('AACTTTTTAA')
actual = adjust_alignment(t,c,new_gaps)
# print ''
# print actual[0]
# print t_expected
self.assertEqual(actual,(t_expected,c_expected))
def test_nearest_gap(self):
"""nearest_gap: functions with single gap in seq
"""
seq = 'AAA-AAAA'
for pos in range(len(seq)):
self.assertEqual(nearest_gap(seq,pos),3)
seq = '-ACGTACGT'
for pos in range(len(seq)):
self.assertEqual(nearest_gap(seq,pos),0)
seq = 'ACGTACGT-'
for pos in range(len(seq)):
self.assertEqual(nearest_gap(seq,pos),8)
def test_nearest_gap_mutliple_gaps(self):
"""nearest_gap: handles multiple gaps in same sequence
"""
seq = 'ACG-TT-AACC--TAAT'
self.assertEqual(nearest_gap(seq,0),3)
self.assertEqual(nearest_gap(seq,1),3)
self.assertEqual(nearest_gap(seq,2),3)
self.assertEqual(nearest_gap(seq,3),3)
self.assertEqual(nearest_gap(seq,4),3)
self.assertEqual(nearest_gap(seq,5),6)
self.assertEqual(nearest_gap(seq,6),6)
self.assertEqual(nearest_gap(seq,7),6)
self.assertEqual(nearest_gap(seq,8),6)
self.assertEqual(nearest_gap(seq,9),11)
self.assertEqual(nearest_gap(seq,10),11)
self.assertEqual(nearest_gap(seq,11),11)
self.assertEqual(nearest_gap(seq,12),12)
self.assertEqual(nearest_gap(seq,13),12)
self.assertEqual(nearest_gap(seq,14),12)
self.assertEqual(nearest_gap(seq,15),12)
self.assertEqual(nearest_gap(seq,16),12)
def test_nearest_gap_ambiguous(self):
"""nearest_gap: handles ambiguous cases by chosing the 5' position
Not certain that this is how this should be handled... Maybe
revisit by seeing which way gives the better alignment?
"""
seq = '-A-A-A-'
self.assertEqual(nearest_gap(seq,1),0)
self.assertEqual(nearest_gap(seq,3),2)
self.assertEqual(nearest_gap(seq,5),4)
def test_nearest_gap_handles_error(self):
"""nearest_gap: errors are handled correctly
"""
seq = 'AA-AAA'
self.assertRaises(IndexError,nearest_gap,seq,22)
self.assertRaises(IndexError,nearest_gap,seq,-1)
seq = 'AAA'
self.assertRaises(UnalignableSequenceError,nearest_gap,seq,1)
def test_introduce_terminal_gaps_simple(self):
"""introduce_terminal_gaps: functions as expected
"""
# no terminal gaps
template = DNA.makeSequence('AAA',Name='t')
aligned_candidate = DNA.makeSequence('AAA',Name='ac')
aligned_template = DNA.makeSequence('AAA',Name='at')
actual = introduce_terminal_gaps(\
template,aligned_template,aligned_candidate)
expected = DNA.makeSequence('AAA',Name='ac')
self.assertEqual(actual,expected)
# 5' terminal gaps only
template = DNA.makeSequence('-AAA',Name='t')
aligned_candidate = DNA.makeSequence('AAA',Name='ac')
aligned_template = DNA.makeSequence('AAA',Name='at')
actual = introduce_terminal_gaps(\
template,aligned_template,aligned_candidate)
expected = DNA.makeSequence('-AAA',Name='ac')
self.assertEqual(actual,expected)
template = DNA.makeSequence('-----AAA',Name='t')
aligned_candidate = DNA.makeSequence('AAA',Name='ac')
aligned_template = DNA.makeSequence('AAA',Name='at')
actual = introduce_terminal_gaps(\
template,aligned_template,aligned_candidate)
expected = DNA.makeSequence('-----AAA',Name='ac')
self.assertEqual(actual,expected)
# 3' terminal gaps only
template = DNA.makeSequence('ACG--',Name='t')
aligned_candidate = DNA.makeSequence('ACG',Name='ac')
aligned_template = DNA.makeSequence('AAA',Name='at')
actual = introduce_terminal_gaps(\
template,aligned_template,aligned_candidate)
expected = DNA.makeSequence('ACG--',Name='ac')
self.assertEqual(actual,expected)
template = DNA.makeSequence('ACCTG----',Name='t')
aligned_candidate = DNA.makeSequence('ACGGG',Name='ac')
aligned_template = DNA.makeSequence('ACCTG',Name='at')
actual = introduce_terminal_gaps(\
template,aligned_template,aligned_candidate)
expected = DNA.makeSequence('ACGGG----',Name='ac')
self.assertEqual(actual,expected)
# 5' and 3' terminal gaps
template = DNA.makeSequence('---AC--CTG----',Name='t')
aligned_candidate = DNA.makeSequence('ACTTGGG',Name='ac')
aligned_template = DNA.makeSequence( 'AC--CTG',Name='at')
actual = introduce_terminal_gaps(\
template,aligned_template,aligned_candidate)
expected = DNA.makeSequence('---ACTTGGG----',Name='ac')
self.assertEqual(actual,expected)
def test_introduce_terminal_gaps_existing_terminal_template_gaps(self):
"""introduce_terminal_gaps: aligned template already has terminal gaps
"""
# one 5' gap in aligned_template
template = DNA.makeSequence('---AAA',Name='t')
aligned_candidate = DNA.makeSequence('AAAA',Name='ac')
aligned_template = DNA.makeSequence('-AAA',Name='at')
actual = introduce_terminal_gaps(\
template,aligned_template,aligned_candidate)
expected = DNA.makeSequence('--AAAA',Name='ac')
self.assertEqual(actual,expected)
# multiple 5' gaps in aligned_template
template = DNA.makeSequence('---AAA',Name='t')
aligned_candidate = DNA.makeSequence('AAAAAA',Name='ac')
aligned_template = DNA.makeSequence( '---AAA',Name='at')
actual = introduce_terminal_gaps(\
template,aligned_template,aligned_candidate)
expected = DNA.makeSequence('AAAAAA',Name='ac')
self.assertEqual(actual,expected)
# one 3' gap in aligned_template
template = DNA.makeSequence('AAA---',Name='t')
aligned_candidate = DNA.makeSequence('AAAA',Name='ac')
aligned_template = DNA.makeSequence( 'AAA-',Name='at')
actual = introduce_terminal_gaps(\
template,aligned_template,aligned_candidate)
expected = DNA.makeSequence('AAAA--',Name='ac')
self.assertEqual(actual,expected)
# multiple 3' gaps in aligned_template
template = DNA.makeSequence('AAA---',Name='t')
aligned_candidate = DNA.makeSequence('AAAAAA',Name='ac')
aligned_template = DNA.makeSequence( 'AAA---',Name='at')
actual = introduce_terminal_gaps(\
template,aligned_template,aligned_candidate)
expected = DNA.makeSequence('AAAAAA',Name='ac')
self.assertEqual(actual,expected)
# 5 prime, 3 prime gaps in aligned_template
template = DNA.makeSequence('--CAA---',Name='t')
aligned_candidate = DNA.makeSequence('GCAAT',Name='ac')
aligned_template = DNA.makeSequence( '-CAA-',Name='at')
actual = introduce_terminal_gaps(\
template,aligned_template,aligned_candidate)
expected = DNA.makeSequence('-GCAAT--',Name='ac')
self.assertEqual(actual,expected)
# internal, 5', 3' gaps
template = DNA.makeSequence('--CATA---',Name='t')
aligned_candidate = DNA.makeSequence('GCA-AT',Name='ac')
aligned_template = DNA.makeSequence( '-CATA-',Name='at')
actual = introduce_terminal_gaps(\
template,aligned_template,aligned_candidate)
expected = DNA.makeSequence('-GCA-AT--',Name='ac')
self.assertEqual(actual,expected)
def test_remove_template_terminal_gaps(self):
""" removing terminal gaps functions as expected """
# no template terminal gaps
candidate = DNA.makeSequence('--CGTTGG-',Name='c')
template = DNA.makeSequence('ACCGT-GGA',Name='t')
actual = remove_template_terminal_gaps(candidate,template)
expected = (DNA.makeSequence('--CGTTGG-',Name='c 1..6'),template)
self.assertEqual(actual[0].Name,expected[0].Name)
self.assertEqual(actual[1].Name,expected[1].Name)
self.assertEqual(actual,expected)
candidate = DNA.makeSequence('',Name='c')
template = DNA.makeSequence('',Name='t')
actual = remove_template_terminal_gaps(candidate,template)
expected = (candidate,template)
self.assertEqual(actual[0].Name,expected[0].Name)
self.assertEqual(actual[1].Name,expected[1].Name)
self.assertEqual(actual,expected)
# 5' template terminal gaps
candidate = DNA.makeSequence('ACCGTTGGA',Name='c')
template = DNA.makeSequence('--CGT-GGA',Name='t')
actual = remove_template_terminal_gaps(candidate,template)
expected = (DNA.makeSequence('CGTTGGA',Name='c 3..9'),
DNA.makeSequence('CGT-GGA',Name='t'))
self.assertEqual(actual[0].Name,expected[0].Name)
self.assertEqual(actual[1].Name,expected[1].Name)
self.assertEqual(actual,expected)
candidate = DNA.makeSequence('ACCGTTGGA',Name='c')
template = DNA.makeSequence('-CCGT-GGA',Name='t')
actual = remove_template_terminal_gaps(candidate,template)
expected = (DNA.makeSequence('CCGTTGGA',Name='c 2..9'),
DNA.makeSequence('CCGT-GGA',Name='t'))
self.assertEqual(actual[0].Name,expected[0].Name)
self.assertEqual(actual[1].Name,expected[1].Name)
self.assertEqual(actual,expected)
# 3' template terminal gaps
candidate = DNA.makeSequence('ACCGTTGGA',Name='c')
template = DNA.makeSequence('ACCGT-GG-',Name='t')
actual = remove_template_terminal_gaps(candidate,template)
expected = (DNA.makeSequence('ACCGTTGG',Name='c 1..8'),
DNA.makeSequence('ACCGT-GG',Name='t'))
self.assertEqual(actual[0].Name,expected[0].Name)
self.assertEqual(actual[1].Name,expected[1].Name)
self.assertEqual(actual,expected)
candidate = DNA.makeSequence('ACCGTTGGA',Name='c')
template = DNA.makeSequence('ACCGT-G--',Name='t')
actual = remove_template_terminal_gaps(candidate,template)
expected = (DNA.makeSequence('ACCGTTG',Name='c 1..7'),
DNA.makeSequence('ACCGT-G',Name='t'))
self.assertEqual(actual[0].Name,expected[0].Name)
self.assertEqual(actual[1].Name,expected[1].Name)
self.assertEqual(actual,expected)
# 5' and 3' template terminal gaps
candidate = DNA.makeSequence('ACCGTTGGA',Name='c')
template = DNA.makeSequence('--CGT-GG-',Name='t')
actual = remove_template_terminal_gaps(candidate,template)
expected = (DNA.makeSequence('CGTTGG',Name='c 3..8'),
DNA.makeSequence('CGT-GG',Name='t'))
self.assertEqual(actual[0].Name,expected[0].Name)
self.assertEqual(actual[1].Name,expected[1].Name)
self.assertEqual(actual,expected)
# name constructed correctly when contains RC
candidate = DNA.makeSequence('ACCGTTGGA',Name='c RC')
template = DNA.makeSequence('--CGT-GG-',Name='t')
actual = remove_template_terminal_gaps(candidate,template)
expected = (DNA.makeSequence('CGTTGG',Name='c RC:3..8'),
DNA.makeSequence('CGT-GG',Name='t'))
self.assertEqual(actual[0].Name,expected[0].Name)
self.assertEqual(actual[1].Name,expected[1].Name)
self.assertEqual(actual,expected)
# ValueError on unaligned seqs
candidate = DNA.makeSequence('ACCGTTGGA',Name='c')
template = DNA.makeSequence('-CGT-GG-',Name='ct')
self.assertRaises(ValueError,\
remove_template_terminal_gaps,candidate,template)
def test_pynast_seq_3037(self):
""" uclust as pairwise aligner fixes problematic bl2seq alignment
Strange alignment issues were found with this sequence in
PyNAST 1.0. This tests that a good alignment is achieved
with this seqeunce in later versions.
"""
template_alignment = LoadSeqs(data=template_128453.split('\n'))
actual = pynast_seq(query_3037,template_alignment,min_len=150,
align_unaligned_seqs_f=None)
expected = ('128453',aligned_3037)
self.assertEqual(actual,expected)
query_3037 = DNA.makeSequence("CTGGGCCGTGTCTCAGTCCCAGTGTGGCTGATCATCCTCTCAGACCAGCTAAGGATCGTCGCCTTGGTGCGCCTTTACCACACCAACTAGCTAAAGGCGATAAATCTTTGATCTCGCGATATCATCCGGTATTAGCAGCAATTTCTCGCTGTTATTCCGAACCTGAGGGCAGATTCCCACGCGTTACGCACCCGTGCGCCACTAAGGCCG",Name=">v15D30.1.08_100583")
aligned_3037 = DNA.makeSequenceame="v15D30.1.08_100583 1..210")
template_128453 = """>128453
------------------------------------------------------------------------------------------------------AACTTGAGAGTTT-GA--T-TC-T-G-GCTC-AG-AA-CGAA-C-GC--TGG-C--G-GC-A-TG--C----T-T--AACACA-T-GC-A-AGT-CGA-A-CGA---------A-G------------------------------------------GC----------------------------------------------------TTC-G----------------------------------------------------------------------------------GC-------------------------------CT--T--AG-T-GG-C-GC-A--C-------------GGG-TGCGT-A--AC-GC-G-T-G-GG---A-A--T-CT-G--C-C-TTC--AG-G------------------------------------------------------------------T-AC----GGA-AT-AA-CTA-------------------------G-G-G-----------------------GAA-A---CTC-GAG-CTAA-TA---CC-G--T-AT-G----------A--------------------T-------------------------------------AT-C-----------------------------------------------------------------------------------------------------------------------G-AG-A--------------------------------------------------------------------------------------------------------------------------------------G-A-T---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CAAA--G-A----------------------------------------------------------------------------------------------------------------------------------------TTT-A----------------------------------------------------------------------------------------------------------------------------------T---C-G--------------C----C-T---G-AA-G---AT---G-A-----G-CCC-GCG--T-TGG--A------TT--A--G-CT-A----G---TTGG-T-A-GG-G-T----AAA-GG-C-T-T-ACCA--A-GG-C-G--A-CG-A------------TCC-A-T------AG-CT-G-G-TCT-G-AG----A--GG-AT--G-AT-C-AG-CCAC-A-CTGGG--A-C-TG-A-GA-C-AC-G-G-CCCAGA-CTCC-TAC-G--G-G-A-G-GC-A-GC-A-G-TG---GG-G-A-ATA-TTGGA-C-AA-T-GG--GG-GA-A----A-C-CC-T-GA-TC-CA-GCAA-TGCC-G-CG-T---G-A-G--T--GA-A-G--A--A-G-G-CC-----TT-AG---------G-G-T-T-G-T--A---AA-G-CTC--------TT-TT-A-C--C-CGG----GA-T--G---A-----------------------T--AA------------------------------T-GA-CA-GT-A-C-CG-G-GA-G---------AA-----------TAAGC-TCC-GG-C-TAA---C--T-CCGT--GCCA--G-C---A--GCCG---C-GG--TA-AT--AC---GG-AG-GGA-GCT-A-G-CG-TTGT-T-CGG-AA-TT-A--C-T--GGGC-GTA----AA-GCGT-AC--G-TA-G-G-C-G------------G--T-TT-A-A-T-AA----G-T-C-A---G-GGG-TG-A-AA-GC--CC-AGA-G--------------------------------------------------------------------CT-C-AA-------------------------------------------------------------------------CT-C-T-GG-AA-C----T-G-C-C-T-T--------T--GA-G-A-C-T-G-TTA--G-A-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-A-G-A-A-C-A-----T-AG--AA-G-A------------G-GT-A-AG-T----GG--AATT-CCG-A-GT--GT-A-GAG-GTGAAA-TT-CGT-AGAT-A-TT-C-GGA--AG-A-AC-A-CC-AG--T--G--GC-GAA-G--G-C---G----A--C-T-TACTG------G-TC-TA--------------------------------------------------------------TA-G-T-T--GA--CG-----CT-GA-GG--T-A-CGA--AA-G-C--------------G-TGGG-TAG-C-A-AACA--GG-ATTA-G-ATA-C-----CC-T-G-GTA-G-T----C-CA--C-G-CCG-T-AAA--C-GATG-AT--AA-CT---------A-GC--T--G-T-CC-G-GG-T--A--------------------------------------------------------------------------------------CAT-GG--------------------------------------------------------------------------------------------------------------------------------------------------T-A-T-CT--G-G-G-T-GG-C------GG--A----GC-TAA--CG-C-A-T--T--AA-GT--T----A-TCC-GCC-T-G-GG-GAG-TA---CGG-----T-C--G-C-A-A-GAT-T--AAA-ACTC-AAA---------GAAA-TTG-ACGGG-G-G-CCTG----C-A--C-A-A-GCG-GT-G--G--AG-CA-T--GT-GGT-TT-AATT-C-G-AAG-CAAC-G-CG-C-AG-A-A-CC-TT-A-CC-AGCGT-TT-G-AC-A-T-C-------------CTGA-T-C-------------G-CG-G-AAA--GT--G-GA-G-A-C--A-C-A-TT-C-T-T--T-C-----AG-------------------------------------T--TC-GG-----------------------------------------CT----G--------GA-TCA-G-A--GA---------------------------------------------------C-A-G-G-T-GCTG-CA-TGG-CT--GTC-GTC-A-GC-TC---G-TG-TC-G--TGA-GA-TGT-T-GG-G-TT-AA-GT-CCCGC-AA--------C-GAG-CGC-A-ACC-C-T-CA--CC--T-CTAG--T-T-G-C-C---AT-C-A--T----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TAAG----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T----T-G------------G----G---C-A--CT---------------T-T-A-G-A-GG-A--AC-T-G-CCG--G-T------------------------------------G-A---TAA----------------------------------G-C-C-G--G-A-GG-A--AGG-T--GGGG-A-TGAC-GTC--AAGT-C---CTC-A-T-G-G-C-C-CTT----AC-G--CG-C-T-GG-GC-TA-CAC-ACGTG-C--TA--CAATG---G-CGGT-G-A--C-AGA-GG-GC--------------------------------------------------------------------------------------------------C-G-C-A-A--G-CCTG-C--A---------------------------------------A-AG-G-T-----------T--A-G-CT---A----------A--TCT-C--------A-AAAAG-CC-G-T-C-T-CAG-TTC--------GGA-T-TGTTC-TC--T-GCAA-CT-C-------------------------------------------------------------------------------------------------G-AGAGC-A-T-G-AA-G-GC-GGAAT-CG-C-TA--G-TA-AT-C-G-C----GGA-TC-A-G-C-------AT--GCC-GC-G-GT-G-AAT-ACGT-T-CCCAGGCCT-TGTA----CACACCG-CCC-GTC-----A---CA--CCA-TG-GG-A--G---TTG-G-AT-TC-ACC--C-GAA------G--G-CGC-TG-C-G-C-T-AA-C-C-C-----------------------------------------------------------G-CA-A---------------------------------------------------------------------------------------------------G--GG-A--GG-C--A---GG-CGA--CC--ACG-G----T-GGG-TT-TAG------------------------CG--ACT-GGGG-TG-AAG-TCGTAACAA-GGTAG-CCGT-AGGGGAA-CCTG-CGGC-TGGATCACCTCCTTTCTAAGGA---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"""
db_aln2 = LoadSeqs(data=dict([
('1','ACGT--ACGTAC-ATA-C-----CC-T-G-GTA-G-T---'),
('2','AGGTTTACGTAG-ATA-C-----CC-T-G-GTA-G-T---'),\
('3','AGGTACT-CCAC-ATA-C-----CC-T-G-GTA-G-T---'),
('4','TCGTTCGT-----ATA-C-----CC-T-G-GTA-G-T---'),
('5','ACGTACGT-TA--ATA-C-----CC-T-G-GTA-G-T---')]),\
moltype=DNA,aligned=DenseAlignment)
template_14990_trimmed = """>14990_5_and_3_prime_lost_four_bases_each
--------------------------------------------------------------------------------------------------------------------------------------AG-GA-CGAA-C-GC--TGG-C--G-GC-G-TG--C----C-T--AATACA-T-GC-A-AGT-CGA-G-CGG---------A-A---ATTTTA--------------------------TTGG---TG----------------------------------------------------CTT-G----------------------------------------------------------------------------------CAC-CTT-------------------TAAAAT-TT--T--AG-C-GGCG-G--A--C-------------GGG-TGAGT-A--AC-AC-G-T-G-GG---TAA--C-CTAC--C-T--TA--TA-G------------------------------------------------------------------A-TT----GGG-AT-AA-CTC-------------------------C-G-G-----------------------GAA-A---CCG-GGG-CTAATAC---CG-A----AT-A---------------------------------A-TA-C-T--T--T----------------TTA---AC-------------------------------------------------------------------------------------------------------------------------A-CA-T--------------------------------------------------------------------------------------------------------------------------------------G-T-T--TGA---------------A--A---G-T-T-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAAA--G-A-C-GG-----T-----T-----------------------------------------------------------------------------------------------------------------------TCG--------------------------------------------------------------------------------------------------------------------------G--C--TG--T---C-A--------------C----T-A---T-AA-G---AT---G-G-----A-CCC-GCG--G-CGC--A------TT--A--G-CT-A----G---TTGG-T-G-AG-G-T----AAC-GG-C-T-C-ACCA--A-GG-C-A--A-CG-A------------TGC-G-T------AG-CC-G-A-CCT-G-AG----A--GG-GT--G-AT-C-GG-CCAC-A-CTGGG--A-C-TG-A-GA-C-AC-G-G-CCCAGA-CTCC-TAC-G--G-G-A-G-GC-A-GC-A-G-TA---GG-G-A-ATC-TTCCA-C-AA-T-GG--AC-GA-A----A-G-TC-T-GA-TG-GA-GCAA-CGCC-G-CG-T---G-A-G--T--GA-A-G--A--A-G-G-AT-----TT-CG---------G-T-T-C-G-T--A---AA-A-CTC--------TG-TT-G-C--A-AGG----GA-A--G---AACAAGT---AGCG-TA----G--T--AA-C---T----G-----G--C-GCT-ACC-TT-GA-CG-GT-A-C-CT-T-GT-T---------AG-----------AAAGC-CAC-GG-C-TAA---C--T-ACGT--GCCA--G-C---A--GCCG---C-GG--TA-AT--AC---GT-AG-GTG-GCA-A-G-CG-TTGT-C-CGG-AA-TT-A--T-T--GGGC-GTA----AA-GCGC-GC--G-CA-G-G-T-G------------G--T-TC-C-T-T-AA----G-T-C-T---G-ATG-TG-A-AA-GC--CC-CCG-G--------------------------------------------------------------------CT-C-AA-------------------------------------------------------------------------CC-G-G-GG-AG------G-GTC-A-T-T--------G--GA-A-A-C-T-G-GGG--A-A-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-T-G-A-G-T-G-----C-AG--AA-G-A------------G-GA-T-AG-T----GG--AATT-CCA-A-GT--GT-A-GCG-GTGAAA-TG-CGT-AGAG-A-TT-T-GGA--GG-A-AC-A-CC-AG--T--G--GC-GAA-G--G-C---G----A--C-T-GTCTG------G-TC-TG--------------------------------------------------------------TA-A-C-T--GA--CA-----CT-GA-GG--C-G-CGA--AA-G-C--------------G-TGGG-GAG-C-A-AACA--GG-ATTA-G-ATA-C-----CC-T-G-GTA-G-T----C-CA--C-G-CCG-T-AAA--C-GATG-AG--TG-CT---------A-AG--T--G-T-TG-G-GG-G--G--T------------------------------------------------------------------------------------TT-CC----------------------------------------------------------------------------------------------------------------------------------------------G---C-C-C-CT--C-A-G-T-GC-T------GC--A----GC-TAA--CG-C-A-T--T--AA-GC--A----C-TCC-GCC-T-G-GG-GAG-TA---CGG-----T-C--G-C-A-A-GAC-T--GAA-ACTC-AAA---------GGAA-TTG-ACGGG-G-G-CCCG----C-A--C-A-A-GCG-GT-G--G--AG-CA-T--GT-GGT-TT-AATT-C-G-AAG-CAAC-G-CG-A-AG-A-A-CC-TT-A-CC-AGGTC-TT-G-AC-A-TCC--------------CGG-T-G-------------A-CC-A-C-T--AT--G-GA-G-A-C--A-T-A--G-T-T-T--C-C-----CC-------------------------------------T--TC-G------------------------------------------GG----G----G--CAA-CGG---T--GA---------------------------------------------------C-A-G-G-T-GGTG-CA-TGG-TT--GTC-GTC-A-GC-TC---G-TG-TC-G--TGA-GA-TGT-T-GG-G-TT-AA-GT-CCCGC-AA--------C-GAG-CGC-A-ACC-C-T-TA--TT--C-TTAG--T-T-G-C-C---AT-C-A--T----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TCAG----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T----T-G------------G----G---C-A--CT---------------C-T-A-A-G-GA-G--AC-T-G-CCG--G-T------------------------------------G-A---TAA----------------------------------A-C-C-G--G-A-GG-A--AGG-T--GGGG-A-TGAC-GTC--AAAT-C---ATC-A-T-G-C-C-C-CTT----AT-G--AC-C-T-GG-GC-TA-CAC-ACGTG-C--TA--CAATG---G-ACGG-T-A--C-AAA-CG-GT--------------------------------------------------------------------------------------------------T-G-C-C-A--A-CCCG-C--G---------------------------------------A-GG-G-G-----------G--A-G-CT---A----------A--TCC-G------A-T-AAAAC-CG-T-T-C-T-CAG-TTC--------GGA-T-TGTAG-GC--T-GCAA-CT-C-------------------------------------------------------------------------------------------------G-CCTAC-A-T-G-AA-G-CC-GGAAT-CG-C-TA--G-TA-AT-C-G-C----GGA-TC-A-G-C-------AT--GCC-GC-G-GT-G-AAT-ACGT-T-CCCGGGCCT-TGTA----CACACCG-CCC-GTC-----A---CA--CCA-CG-AG-A--G---TTT-G-TA-AC-ACC--C-GAA------G--T-CGG-TG-A-G-G-T-AA-C-C-T-----------------------------------------------------------T-TA-----------------------------------------------------------------------------------------------------T--GG-A-C-C-C--A---CC-CGC--CG--AAG-G----T-GGG-AT-AAA------------------------TA--ATT-GGGG-TG-AAT-TCTTAACAA-GGTAC-CCGT-ATCGGAA-GGTG-CGGC-TGG------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
"""
input_seq_14990 = """>14990
GCTCAGGACGAACGCTGGCGGCGTGCCTAATACATGCAAGTCGAGCGGAAATTTTATTGGTGCTTGCACCTTTAAAATTTTAGCGGCGGACGGGTGAGTAACACGTGGGTAACCTACCTTATAGATTGGGATAACTCCGGGAAACCGGGGCTAATACCGAATAATACTTTTTAACACATGTTTGAAAGTTGAAAGACGGTTTCGGCTGTCACTATAAGATGGACCCGCGGCGCATTAGCTAGTTGGTGAGGTAACGGCTCACCAAGGCAACGATGCGTAGCCGACCTGAGAGGGTGATCGGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTAGGGAATCTTCCACAATGGACGAAAGTCTGATGGAGCAACGCCGCGTGAGTGAAGAAGGATTTCGGTTCGTAAAACTCTGTTGCAAGGGAAGAACAAGTAGCGTAGTAACTGGCGCTACCTTGACGGTACCTTGTTAGAAAGCCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGTGGTTCCTTAAGTCTGATGTGAAAGCCCCCGGCTCAACCGGGGAGGGTCATTGGAAACTGGGGAACTTGAGTGCAGAAGAGGATAGTGGAATTCCAAGTGTAGCGGTGAAATGCGTAGAGATTTGGAGGAACACCAGTGGCGAAGGCGACTGTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTGGGGGGTTTCCGCCCCTCAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGGTCGCAAGACTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCCGGTGACCACTATGGAGACATAGTTTCCCCTTCGGGGGCAACGGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTATTCTTAGTTGCCATCATTCAGTTGGGCACTCTAAGGAGACTGCCGGTGATAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGACGGTACAAACGGTTGCCAACCCGCGAGGGGGAGCTAATCCGATAAAACCGTTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCCGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGAGGTAACCTTTATGGACCCACCCGCCGAAGGTGGGATAAATAATTGGGGTGAATTCTTAACAAGGTACCCGTATCGGAAGGTGCGGCTGGATCA"""
input_seq_10116 = """>10116
CTGGTCCGTGTCTCAGTACCAGTGTGGGGGACCTTCCTCTCAGAACCCCTACGCATCGTCGCCTTGGTGGGCCGTTACCCCACCAACTATCTAATCAGACGCGAGCCCATCTCTGAGCGAATTTCTTTGATATTCAAATCATGCGATTTAAATATGTTATGAGGTATTACCATCCGTTTCCAGAAGCTATCCCTCTCTCAGAGGCAGGTTGCTCACGTGTTACTCACCCGTTCGCCACTCAACTCTTCATCGGTGAGTGCAAGCACTCGGTGATGAAGAAGTTTCGTTCGACTTGCATGTATTAGGCACGCCGCCAGCGTTCATCCTGAGCCAGGATCAAACTCTG"""
expected_fail1 = [('FAKE1 here is some desc.73602 tag1;tag2, tag3:tag4',\
'AGGCGGCTACCTGGACCAACACTGACACTGAGGCACGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCCTAAACGATGCGAACTGGATGTTGGGTGCAATTTGGCACGCAGTATCGAAGCTAACGCGTTAAGTTCGCCGCCTGGGGAGTACGGTCGCAAGACTTAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGTATGTGGTTTAATTCGATGCAACGCGAAGAACCTTACCTGGTCTTGACATCCACGGAACTTTCCATAGATGGATTGGTGCCTTCGGGAACCGTGAGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGTCCTTAGTTGCCAGCACGTAATGGTGGGAACTCTAAGGAGACCGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAGTCATCATGGCCCTTAGGGGACCAGGGCTACACACGTACTACAATGGTAGGGACAGAGGGCTGCAAACCCGCGAGGGCAAGCCAATCCCAGAAACCCTATCTCAGTCCGGATTGGAGTTTGCAACTCGACTCCATGAAGTCGGAATCGCTAGTAATCGCAGATCAGCATTGCTGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCATGGGAGTTTGTTGCACCAGAAGCAGGTAGCTTAACCTTCGGGAGGGCGCTCACGGTGTGGCCGATGACTGGGGTGAAGTCGTAACAAGGTAGCCGTATCGGAAGGTGCGGCTGGATCACCTCCTTTTGAGCATGACGTCATCGTCCTGTCGGGCGTCCTCACAAATTACCTGCATTCAGAGATGCGTATCGGCACAGGCCGGTATGCGAAAGTCCCATCATGGGGCCTTAGCTCAGCTGGGAGAGCACCTGCTTTGCAAGCAGGGGGTCGTCGGTTCGATCCCGACAGGCTCCACCATTTGAGTGAAACGACTTTGGGTCTGTAGCTCAGGTGGTTAGAGCGCACCCCTGATAAGGGTGAGGTCGGTGGTTCGAGTCCTCCCAGACCCACCACTCTGAATGTAGTGCACACTTAAGAATTTATATGGCTCAGCGTTGAGGCTGAGACATGTTCTTTTATAACTTGTGACGTAGCGAGCGTTTGAGATATCTATCTAAACGTGTCGTTGAGGCTAAGGCGGGGACTTCGAGTCCCTAAATAATTGAGTCGTATGTTCGCGTTGGTGGCTTTGTACCCCACACAACACGGCGTATGGCCCCGAGGCAACTTGGGGTTATATGGTCAAGCGAATAAGCGCACACGGTGGATGCCTAGGCGGTCAGAGGCGATGAAGGACGTGGTAGCCTGCGAAAAGTGTCGGGGAGCTGGCAACAAGCTTTGATCCGGCAATATCCGAATGGGGAAACCCGG')]
input_seqs2 = """>AKIW1129_fasta.screen.Contig1 description field
GAGTTTGATCATGGCTCAGGACGAACGCTGGCGGCGTGCCTAATACATGCAAGTCGAGCGAATGACAGAGGAGCTTGCTCCTCTCGATTTAGCGGCGGACGGGTGAGTAACACGTGGGTAACCTGCCTTATAGCTTGGGATAACTCCGGGAAACCGGGGCTAATACCGAATAATACTTTTGGACACATGTTCGAAAGTTGAAAGATGGTTCTGCTATCACTATAAGATGGACCCGCGCTGCATTAGCTAGTTGGTGAGGTAACGGCTCACCAAGGCCACGATGCATAGCCGACCTGAGAGGGTGATCGGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTAGGGAATCTTCCACAATGGACGAAAGTCTGATGGAGCAACGCCGCGTGAGTGAAGAAGGATTTCGGTTCGTAAAACTCTGTTGTAAGGGAAGAACAAGTACAGTAGTAACTGGCTGTACCTTGACGGTACCTTATTAGAAAGCCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGTGGTCCTTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGGGACTTGAGTGCAGAAGAGGATAGTGGAATTCCAAGTGTAGCGGTGAAATGCGTAGAGATTTGGAGGAACACCAGTGGCGAAGGCGACTGTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTGGGGGGTTTCCGCCCCTCAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGGTCGCAAGACTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCCATTGACCACTGTAGAGATACAGTTTTCCCTTCGGGGACAACGGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGTTCTTAGTTGCCATCATTTAGTTGGGCACTCTAAGGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGACGGTACAAACGGTTGCCAACCCGCGAGGGGGAGCTAATCCGATAAAACCGTTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCCGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGAGGTAACCTTTTGGAGCCAGCCGCCGAAGGTGGGATAGATGATTGGGGTGAAGTCGTAACAAGGT"""
input_seqs_gaps = """>FAKE1 here is some desc.73602 tag1;tag2, tag3:tag4
AGGCGGCTACCTGGACCAACACTGACACTGAGGCACGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCCTAAACGATGCGAACTGGATGTTGGGTGCAATTTGGCACGCAGTATCGAAGCTAACGCGTTAAGTTCGCCGCCTGGGGA
GTACGGTCGCAAGACTTAAA-----CTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGTATGTGGTTTAATTCGATGCAACGCGAAGAACCTTACCTGGTCTTGACATCCACGGAACTTTCCATAGATGGATTGGTGCCTTCGGGAACCGTGAGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCC
TTGTCCTTAGTTGCCAGCACGTAAT---------GGTGGGAACTCTAAGGAGACCGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAGTCATCATGGCCCTTAGGGGACCAGGGCTACACACGTACTACAATGGTA-GGGACAGAGGGCTGCAAACCCGCGAGGGCAAGCCAATCCCAGAAACCCTATCTCAGTCCGGATTGGAGTTTGCAACTCGACTCCATGAAGTCGGAATCGCTAGTAATCGCAGATCAGCATTGCTGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCATGGGAGTTTGTTGCACCAGAA
GCAGGTAGCTTAACCTTCGGGAGGGCGCTCACGGTGTGGCCGATGACTGGGGTGAAGTCGTAACAAGGTAGCCGTATCGGAAGGTGCGGCTGGATCACCTCCTTTTGAGCATGACGTCATCGTCCTGTCGGGCGTCCTCACAAATTACCTGCATTCAGAGATGCGTATCGGCACAGGCCGGTATGCGAAAGTCCCATCATGGGGCCTTAGCTCAGCTGGGAGAGCACCTGCTTTGCAAGCAGGGGGTCGTCGGTTCGATCCCGACAGGCTCCACCATTTGAGTGAAACGACTTTGGGTCTGTAGCTCAGGTGGTTAGAGCGCACCCCTGATAAGGGTGAGGTCGGTGGTTCGAGTCCTCCC-----------------AGACCCACCACTCTGAATGTAGTGCACACTTAAGAATTTATATGGCTCAGCGTTGAGGCTGAGACATGTTCTTTTATAACTTGTGACGTAGCGAGCGTTTGAGATATCTATCTAAACGTGTCGTTGAGGCTAAGGCGGGGACTTCGAGTCCCTAAATAATTGAGTCGTATGTTCGCGTTGGTGGCTTTGTACCCCACACAACACGGCGTATGGCCCCG--AGGCAACTTGGGGT
TATATGGTCAAGCGAATAAGCGCACACGGTGGATGCCTAGGCGGTCAGAGGCGA----TGAAGGACGTGGTAGCCTGCGAAAAGTGTCGGGGAGCTGGCAACAAGCTTTGATCCGGCAATATCCGAATGGGGAAACCCGG
>AKIW1129_fasta.screen.Contig1 description field
GAGTTTGATCATGGCTCAGGACGAACGCTGGCGGCGTGCCTAATACATGCAAGTCGAG-------------CGAATGACAGAGGAGCTTGCTCCTCTCGATTTAGCGGCGGACGGGTGAGTAACACGTGGGTAACCTGCCTTATAGCTTGGGATAACTCCGGGAAACCGGGGCTAATAC-CGAATAATACTTTTGGACACATGTTCGAAAGTTGAAAGATGGTTCTGCTATCACTATAAGATGGACCCGCGCTGCATTAGCTAGTTGGTGAGGTAACGGCTCACCAAGG-CCACGATGCATAGCCGACCTGAGAGGGTGATCGGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTAGGGAATCTTCCACAATGGACGAAAGT------CTGATGGAGCAACGCCGCGTGAGTGAAGAAGGATTTCGGTTCGTAAAACTCTGTTGTAAGGGAAGAACAAGTACAGTAGTAACTGGCTGTACCTTGACGGTACCTTATTAGAAAGCCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGTGGTCCTT--------TAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGGGACTTGAGTGCAGAAGAGGATAGTGGAATTCCAAGTGTAGCGGTGAAATGCGTAGAGATTTGGAGGAACACCAGTGGCGAAGGCGACTGTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTGGGGGGTTTCCGCCCCTCAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGGTCGCAAGACTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCCAT-------------TGACCACTGTAGAGATACAGTTTTCCCTTCGGGGACAACGGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGC----AACCCTTGTTCTTAGTTGCCATCATTTAGTTGGGCACTCTAAGGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCT-GGGCTACACACGTGCTACAATGGACGGTACAAACGGTTGCCAACCCGCGAGGGGGAGCTAATCCGATAAAACCGTTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTAC-------ATGAAGCCGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCG---------AAGTCGGTGAGGTAACCTTTTGGAGCCAGCCGCCGAAGGTGGGATAGATGATTGGGGTGAAGTCGTAACAAGGT
"""
expected_logfile_contents = \
"""1\t23\t\t5\t100.00\t23
2\t2\tNo search results.
"""
expected_stringent_logfile_contents = \
"""1\t23\tNo search results.
"""
pynast_test_template_fasta1 = """>128618
----------------------------------------------------------------------------------------------------------GGAGAGTTT-GA--T-CC-T-G-GCTC-AG-GA-CGAA-C-GC--TGG-C--G-GC-G-TG--C----C-T--AATACA-T-GC-A-AGT-CGA-G-CGG---------A-C---CG-A----------------------------CGGG---AG----------------------------------------------------CTT-G----------------------------------------------------------------------------------CTC-TCT-------------------TA--G--GT--C--AG-C-GGCG-G--A--C-------------GGG-TGAGT-A--AC-AC-G-T-G-GG---TAA--C-CTGC--C-T--GT--AA-G------------------------------------------------------------------A-CT----GGG-AT-AA-CTC-------------------------C-G-G-----------------------GAA-A---CCG-GGG-CTAATAC---CG-G----AT-G---------------------------------C-TT-G-A--T--T----------------GAA---CC-------------------------------------------------------------------------------------------------------------------------G-CA-T--------------------------------------------------------------------------------------------------------------------------------------G-G-T--TCC---------------A--A--TC-A-T-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AAAA--G-G-T-GG-----C-----T----------------------------------------------------------------------------------------------------------------------TTCA--------------------------------------------------------------------------------------------------------------------------G--C--TA--C---C-A--------------C----T-T---A-CA-G---AT---G-G-----A-CCC-GCG--G-CGC--A------TT--A--G-CT-A----G---TTGG-T-G-AG-G-T----AAC-GG-C-T-C-ACCA--A-GG-C-G--A-CG-A------------TGC-G-T------AG-CC-G-A-CCT-G-AG----A--GG-GT--G-AT-C-GG-CCAC-A-CTGGG--A-C-TG-A-GA-C-AC-G-G-CCCAGA-CTCC-TAC-G--G-G-A-G-GC-A-GC-A-G-TA---GG-G-A-ATC-TTCCG-C-AA-T-GG--AC-GA-A----A-G-TC-T-GA-CG-GA-GCAA-CGCC-G-CG-T---G-A-G--T--GA-T-G--A--A-G-G-TT-----TT-CG---------G-A-T-C-G-T--A---AA-A-CTC--------TG-TT-G-T--T-AGG----GA-A--G---AACAAGT---ACCG-TT----C--G--AA-T---A----G-----GG-C-GGT-ACC-TT-GA-CG-GT-A-C-CT-A-AC-C---------AG-----------AAAGC-CAC-GG-C-TAA---C--T-ACGT--GCCA--G-C---A--GCCG---C-GG--TA-AT--AC---GT-AG-GTG-GCA-A-G-CG-TTGT-C-CGG-AA-TT-A--T-T--GGGC-GTA----AA-GCGC-GC--G-CA-G-G-C-G------------G--T-TT-C-T-T-AA----G-T-C-T---G-ATG-TG-A-AA-GC--CC-CCG-G--------------------------------------------------------------------CT-C-AA-------------------------------------------------------------------------CC-G-G-GG-AG------G-GTC-A-T-T--------G--GA-A-A-C-T-G-GGG--A-A-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-T-G-A-G-T-G-----C-AG--AA-G-A------------G-GA-G-AG-T----GG--AATT-CCA-C-GT--GT-A-GCG-GTGAAA-TG-CGT-AGAG-A-TG-T-GGA--GG-A-AC-A-CC-AG--T--G--GC-GAA-G--G-C---G----A--C-T-CTCTG------G-TC-TG--------------------------------------------------------------TA-A-C-T--GA--CG-----CT-GA-GG--C-G-CGA--AA-G-C--------------G-TGGG-GAG-C-G-AACA--GG-ATTA-G-ATA-C-----CC-T-G-GTA-G-T----C-CA--C-G-CCG-T-AAA--C-GATG-AG--TG-CT---------A-AG--T--G-T-TA-G-AG-G--G--T------------------------------------------------------------------------------------TT-CC----------------------------------------------------------------------------------------------------------------------------------------------G---C-C-C-TT--T-A-G-T-GC-T------GC--A----GC-AAA--CG-C-A-T--T--AA-GC--A----C-TCC-GCC-T-G-GG-GAG-TA---CGG-----T-C--G-C-A-A-GAC-T--GAA-ACTC-AAA---------GGAA-TTG-ACGGG-G-G-CCCG----C-A--C-A-A-GCG-GT-G--G--AG-CA-T--GT-GGT-TT-AATT-C-G-AAG-CAAC-G-CG-A-AG-A-A-CC-TT-A-CC-AGGTC-TT-G-AC-A-T-C--------------CTC-T-G-------------A-CA-A-C-C--CT--A-GA-G-A-T--A-G-G--G-C-T-T--C-C-----CC-------------------------------------T--TC-G------------------------------------------GG----G----G---CA-GAG---T--GA---------------------------------------------------C-A-G-G-T-GGTG-CA-TGG-TT--GTC-GTC-A-GC-TC---G-TG-TC-G--TGA-GA-TGT-T-GG-G-TT-AA-GT-CCCGC-AA--------C-GAG-CGC-A-ACC-C-T-TG--AT--C-TTAG--T-T-G-C-C---AG-C-A--T----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TCAG----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T----T-G------------G----G---C-A--CT---------------C-T-A-A-G-GT-G--AC-T-G-CCG--G-T------------------------------------G-A---CAA----------------------------------A-C-C-G--G-A-GG-A--AGG-T--GGGG-A-TGAC-GTC--AAAT-C---ATC-A-T-G-C-C-C-CTT----AT-G--AC-C-T-GG-GC-TA-CAC-ACGTG-C--TA--CAATG---G-GCAG-A-A--C-AAA-GG-GC--------------------------------------------------------------------------------------------------A-G-C-G-A--A-GCCG-C--G---------------------------------------A-GG-C-T-----------A--A-G-CC---A----------A--TCC-C------A-C-AAATC-TG-T-T-C-T-CAG-TTC--------GGA-T-CGCAG-TC--T-GCAA-CT-C-------------------------------------------------------------------------------------------------G-ACTGC-G-T-G-AA-G-CT-GGAAT-CG-C-TA--G-TA-AT-C-G-C----GGA-TC-A-G-C-------AT--GCC-GC-G-GT-G-AAT-ACGT-T-CCCGGGCCT-TGTA----CACACCG-CCC-GTC-----A---CA--CCA-CG-AG-A--G---TTT-G-TA-AC-ACC--C-GAA------G--T-CGG-TG-A-G-G-T-AA-C-C-T-----------------------------------------------------------T-TT--------------------------------------------------------------------------------------------------------GG-A-G-C-C--A---GC-CGC--CG--AAG-G----T-GGG-AC-AGA------------------------TG--ATT-GGGG-TG-AAG-TCGTAACAA-GGTAG-CCGT-ATCGGAA-GGTG-CGGC-TGGATCACCTCCTTTCT--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
>81187
---------------------------------------------------------------------------------------------------------------AGAGTTTGAT-CC-T-G-GCTC-AG-AG-TGAA-C-GC--TGG-C--G-GC-A-TG--C----C-T--AACACA-T-GC-A-AGT-CGA-A-CG----------G-TAA-CA-G------------------------------GC-C-CG----------------------------------------------------CAA-G----------------------------------------------------------------------------------GG---T------------------G-CT--G--AC--G--AG-T-GG-C-GG-A--C-------------GGG-TGAGG-A--AC-AC-A-T-C-GG---A-A--T-TT-G--C-C-CAG--AC-G------------------------------------------------------------------T-GG----GGG-AT-AA-CGT-------------------------A-G-G-----------------------GAA-A---CTT-ACG-CTAA-TA---CC-G--C-AT-A----------C--------------------G-------------------------------------TC-C-----------------------------------------------------------------------------------------------------------------------T-AC-G--------------------------------------------------------------------------------------------------------------------------------------G-G-A---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAAA--G-C-G-GG-----G--GA-T--C--------------------------------------------------------------------------------------------------------------------GCA-A----------------------------------------------------------------------------------------------------------------------A----CC-TC--G---C-G--------------C----G-G---T-TG-G---AT---G-A-----G-CCG-ATG--T-CGG--A------TT--A--G-CT-A----G---TTGG-C-G-GG-G-T----AAG-AG-C-C-C-ACCA--A-GG-C-G--A-CG-A------------TCC-G-T------AG-CT-G-G-TCT-G-AG----A--GG-AT--G-AT-C-AG-CCAC-A-TTGGG--A-C-TG-A-GA-C-AC-G-G-CCCAAA-CTCC-TAC-G--G-G-A-G-GC-A-GC-A-G-TG---GG-G-A-ATA-TTGGA-C-AA-T-GG--GG-GC-A----A-C-CC-T-GA-TC-CA-GCAA-TGCC-G-CG-T---G-T-G--T--GA-A-G--A--A-G-G-CC-----TT-CG---------G-G-T-T-G-T--A---AA-G-CAC--------TT-TT-A-T--C-AGG----AA-C--G---AA-ACGC---GCTT-GG----T--G--AA-T---A----G-----CA-G-GTG-AAC--T-GA-CG-GT-A-C-CT-G-AG-G---------AA-----------TAAGC-ACC-GG-C-TAA---C--T-TCGT--GCCA--G-C---A--GCCG---C-GG--TA-AT--AC---GA-AG-GGT-GCA-A-G-CG-TTAC-T-CGG-AA-TT-A--C-T--GGGC-GTA----AA-GGGT-GC--G-TA-G-G-T-G------------G--T-TG-T-T-T-AA----G-T-C-T---G-CTG-TG-A-AA-GC--CC-CGG-G--------------------------------------------------------------------CT-C-AA-------------------------------------------------------------------------CC-T-G-GG-AA-T----G-G-C-A-G-T--------G--GA-T-A-C-T-G-GGC--A-G-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-A-G-A-A-T-G-----C-GG--TA-G-A------------G-GG-T-AG-T----GG--AATT-CCC-G-GT--GT-A-GCA-GTGAAA-TG-CGT-AGAG-A-TC-G-GGA--GG-A-AC-A-CC-AG--T--G--GC-GAA-G----C---G----G--C-T-ACCTG------G-AC-CA--------------------------------------------------------------GC-A-T-T--GA--CA-----CT-CA-AG--C-A-CGA--AA-G-C--------------G-TGGG-GAG-C-A-AACA--GG-ATTA-G-ATA-C-----CC-T-G-GTA-G-T----C-CA--C-G-CCC-T-AAA--C-GATG-TC--TA-CT---------A-GT--T--G-T-CG-G-GT-C--T---------------------------------------------------------------------------------------TA-AT--------------------------------------------------------------------------------------------------------------------------------------------------T-G-A-CT--T-G-G-T-AA-C------GC--A----GC-TAA--CG-C-G-T--G--AA-GT--A----G-ACC-GCC-T-G-GG-GAG-TA---CGG-----T-C--G-C-A-A-GAT-T--AAA-ACTC-AAA---------GGAA-TTG-ACGGG-G-A-CCCG----C-A--C-A-A-GCG-GT-G--G--AT-GA-T--GT-GGA-TT-AATT-C-G-ATG-CAAC-G-CG-A-AA-A-A-CC-TT-A-CC-TACC--TT-G-AC-A-T-G--------------GCT-G-G-------------A-AT-C-C-C--GG--A-GA-G-A-T--T-T-G--G-G-A-G--T-GC----TC-------------------------------------G--AA-A------------------------------------------GA---GA----A---CC-AGT---A--CA---------------------------------------------------C-A-G-G-T-GCTG-CA-TGG-CT--GTC-GTC-A-GC-TC---G-TG-TC-G--TGA-GA-TGT-T-GG-G-TT-AA-GT-CCCGC-AA--------C-GAG-CGC-A-ACC-C-T-TG--TC--A-TTAG--T-T-G-C-T---A--C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAAA-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------G------------G----G---C-A--CT---------------C-T-A-A-T-GA-G--AC-T-G-CCG--G-T------------------------------------G-A---CAA----------------------------------A-C-C-G--G-A-GG-A--AGG-T--GGGG-A-TGAC-GTC--AAGT-C---CTC-A-T-G-G-C-C-CTT----AT-G--GG-T-A-GG-GC-TT-CAC-ACGTC-A--TA--CAATG---G-TACA-T-A--C-AGA--C-GC--------------------------------------------------------------------------------------------------C-G-C-C-A--A-CCCG-C--G---------------------------------------A-GG-G-G-----------G--A-G-CT---A----------A--TCG-C------A-G-AAAGT-GT-A-T-C-G-TAG-TCC--------GGA-T-TGTAG-TC--T-GCAA-CT-C-------------------------------------------------------------------------------------------------G-ACTGC-A-T-G-AA-G-TT-GGAAT-CG-C-TA--G-TA-AT-C-G-C----GGA-TC-A-G-C-------AT--GTC-GC-G-GT-G-AAT-ACGT-T-CCCGGGTCT-TGTA----CACACCG-CCC-GTC-----A---CA--CCA-TG-GG-A--G---CGG-G-TT-TT-ACC--A-GAA------G--T-AGG-TA-G-C-T-T-AA-C-C-------------------------------------------------------------G-CA-A------------------------------------------------------------------------------------------------------GG-A--GG-G--C---GC-TTA--CC--ACG-G----T-AGG-AT-TCG------------------------TG--ACT-GGGGTGAAGTCGTAACAAGGTAAC----C-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
>58677
--------------------------------------------------------------------------------------------------------------------------C--T-G-GCTC-AG-GA-CGAA-C-GC--TGG-C--G-GC-G-TG--C----C-T--AATACA-T-GC-A-AGT-CGA-G-CGG---------A-C---CA-A-------------------------------A-T-CG------------------------------------------------GAGCTTGCT----------------------------------------------------------------------------------CTGG--------------------T-TT--G--GT--C--AG-C-GG-C-GG-A--C-------------GGG-TGAGT-A--AC-AC-G-T-G-GG---CAA--C-CT-G--C-C-CGC--AA-G------------------------------------------------------------------A-CC----GGG-AT-AA-CTC-------------------------C-G-G-----------------------GAA-A---CCG-GAG-CTAA-TA---CC-G--G-AT-A----------A--------------------C-A--C-C-G--A--A-----------------GA---CC-G-----------------------------------------------------------------------------------------------------------------------C-AT-G--------------------------------------------------------------------------------------------------------------------------------------G---T--C-T---------------T--T-G-G-T-T-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAAA--G-G-C-GG-----C-CTTTG-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GC-TG--T---C-A--------------C----T-T---G-CG-G---AT---G-G-----G-CCC-GCG--G-CGC--A------TT--A--G-CT-A----G---TTGG-T-G-AG-G-T----AAC-GG-C-T-C-ACCA--A-GG-C-G--A-CG-A------------TGC-G-T------AG-CC-G-G-CCT-G-AG----A--GG-GT--G-AC-C-GG-CCAC-A-CTGGG--A-C-TG-A-GA-C-AC-G-G-CCCAGA-CTCC-TAC-G--G-G-A-G-GC-A-GC-A-G-TA---GG-G-A-ATC-TTCCG-C-AA-T-GG--GC-GA-A----A-G-CC-T-GA-CG-GA-GCGA-CGCC-G-CG-T---G-A-G--C--GA-A-G--A--A-G-G-CC-----TT-CG---------G-G-T-C-G-T--A---AA-G-CTC--------TG-TT-G-T--G-AGG----GA-C--G---AAGGAGC---GCCG-TT----C--G--AA-G---A----G-----GG-C-GGC-GCG-GT-GA-CG-GT-A-C-CT-C-AC-G---------AG-----------AAAGC-CCC-GG-C-TAA---C--T-ACGT--GCCA--G-C---A--GCCG---C-GG--TA-AT--AC---GT-AG-GGG-GCG-A-G-CG-TTGT-C-CGG-AA-TT-A--T-T--GGGC-GTA----AA-GCGC-GC--G-CA-G-G-C-G------------G--T-CC-C-T-T-AA----G-T-C-T---G-ATG-TG-A-AA-GC--CC-ACG-G--------------------------------------------------------------------CT-C-AA-------------------------------------------------------------------------CC-G-T-GG-AG-G----G-T-C-A-T-T--------G--GA-A-A-C-T-G-GGG--G-A-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-T-G-A-G-T-G-----C-AG--GA-G-A------------G-GA-G-AG-C----GG--AATT-CCA-C-GT--GT-A-GCG-GTGAAA-TG-CGT-AGAG-A-TG-T-GGA--GG-A-AC-A-CC-AG--T--G--GC-GAA-G--G-C---G----G--C-T-CTCTG------G-CC-TG--------------------------------------------------------------CA-A-C-T--GA--CG-----CT-GA-GG--C-G-CGA--AA-G-C--------------G-TGGG-GAG-C-A-AACA--GG-ATTA-G-ATA-C-----CC-T-G-GTA-G-T----C-CA--C-G-CCG-T-AAA--C-GATG-AG--TG-CT---------A-AG--T--G-T-TA-G-AG-G----------------------------------------------------------------------------------------GGTC-ACAC--------------------------------------------------------------------------------------------------------------------------------------------------C-C-TT--T-A-G-T-GC-T------GC--A----GC-TAA--CG-C-G-A--T--AA-GC--A----C-TCC-GCC-T-G-GG-GAG-TA---CGG-----C-C--G-C-A-A-GGC-T--GAA-ACTC-AAA---------GGAA-TTG-ACGGG-G-G-CCCG----C-A--C-A-A-GCG-GT-G--G--AG-CA-T--GT-GGT-TT-AATT-C-G-AAG-CAAC-G-CG-A-AG-A-A-CC-TT-A-CC-AGGTC-TT-G-AC-A-T-C--------------CCC-T-G-------------A----C-A-A--CC--CAAG-A-G-A--T-T-G--G-G-C-G--T-TC----CC-----------------------------------CCTT-CG-G------------------------------------------GG---GG----A---CA-GGG---T--GA---------------------------------------------------C-A-G-G-T-GGTG-CA-TGG-TT--GTC-GTC-A-GC-TC---G-TG-TC-G--TGA-GA-TGT-T-GG-G-TT-AA-GT-CCCGC-AA--------C-GAG-CGC-A-ACC-C-T-CG--CC--T-CTAG--T-T-G-C-C---AG-C-A--T----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TCAG----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T----T-G------------G----G---C-A--CT---------------C-T-A-G-A-GG-G--AC-T-G-CCG--G-C------------------------------------G-A---CAA----------------------------------G-T-C-G--G-A-GG-A--AGG-T--GGGG-A-TGAC-GTC--AAAT-C---ATC-A-T-G-C-C-C-CTT----AT-G--AC-C-T-GG-GC-TA-CAC-ACGTG-C--TA--CAATG---G-GCGG-T-A--C-AAA-GG-GC--------------------------------------------------------------------------------------------------T-G-C-G-A--A-CCCG-C--G---------------------------------------A-GG-G-G-----------G--A-G-CG---A----------A--TCC-C------A-A-AAAGC-CG-C-T-C-T-CAG-TTC--------GGA-T-TGCAG-GC--T-GCAA-CT-C-------------------------------------------------------------------------------------------------G-CCTGC-A-T-G-AA-G-CC-GGAAT-CG-C-TA--G-TA-AT-C-G-C----GGA-TC-A-G-C-------AT--GCC-GC-G-GT-G-AA-TACGT-T-CCCGGGCCT-TGTA----CACACCG-CCC-GTC-----A---CA--CCA-CG-AG-A--G---CTT-G-CA-AC-ACC--C-GAA------G--T-CGG-TG-A-G-G-C-AA-C-C-C-----------------------------------------------------------G-CA-A---------------------------------------------------------------------------------------------------G--GG-A--GC-C--A---GC-CGC--CG--AAG-G----T-GGG-GC-AAG------------------------TG--ATT-GGGG-TG-AAG-TCGTAACAA-GGTAG-CCGT-ACCGGAA--GTG-CGGCTGGATCACCCTCCTT-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
>14308
-------------------------------------------------------------------------------------------------------------------------------------------------------TGG-C--G-GC-G-TG--C----C-T--AACACA-T-GC-A-AGT-CGC-G-CGA---------G-A---AA-G----------------------------CTGC-T--C----------------------------------------------------TTT-G----------------------------------------------------------------------------------AG--CAGT----------------T--A--G--TA--A--AG-C-GG-C-GG-A--C-------------GGG-TGAGT-A--AC-GC-G-T-G-AG---TAA--T-CT-A--C-C-TTT--AA-G------------------------------------------------------------------T-CT----GAT-AT-AA-CTT-------------------------C-T-C-----------------------GAA-A---GGG-AAG-CTAA-TT---TC-G--G-AT-A---------------------------------T-TA-T-G--C--T----------------GCC---TG-G-----------------------------------------------------------------------------------------------------------------------A-TA-A--------------------------------------------------------------------------------------------------------------------------------------C-C-A--G-G---------------C--T-G-C-A-T-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CAAA--G-G-C-GG-----C-----T-----------------------------------------------------------------------------------------------------------------------TTT--------------------------------------------------------------------------------------------------------------------------T--GC-CT--C---C-G--------------C----T-T---T-TA-G---AT---G-T-----G-CTC-GCG--T-CCC--A------TT--A--G-CT-T----G---TTGG-T-G-AG-A-T----AAC-AG-C-T-C-ACCA--A-GG-C-T--G-CG-A------------TGG-G-T------AG-CC-G-A-CCT-G-AG----A--GG-GT--G-AT-C-GG-CCAC-A-CTGGG--A-C-TG-A-GA-C-AC-G-G-CCCAGA-CTCC-TAC-G--G-G-A-G-GC-T-GC-A-G-TG---GG-G-A-ATC-TTTCG-C-AA-T-GA--GC-GC-A----A-G-CT-T-GA-CG-AA-GCGA-CGCC-G-CG-T---G-A-G--T--GA-T-G--A--A-G-G-CC-----TT-CG---------G-G-T-C-G-T--A---AA-G-CTC--------TG-TC-C-T--C-AGG----GA-A--G---AACATCT---TAGT-AG----T--G--AA-T--------A-----AC-T-GCT-AGGCTT-GA-CG-GT-A-C-CT-G-AG-A---------AG-----------AAAGC-TCC-GG-C-TAA---C--T-ACGT--GCCA--G-C---A--GCCG---C-GG--TA-AT--AC---GT-AG-GGG-GCA-A-G-CG-TTGT-C-CGG-AA-TC-A--T-T--GGGC-GTA----AA-GGGT-GC--G-CA-G-G-C-G------------G--T-CT-G-G-C-AA----G-T-C-A---A-GTG-TG-A-AA-TG--TA-TCG-G--------------------------------------------------------------------CT-T-AA-------------------------------------------------------------------------CT-G-A-TA-CA------C-TGC-G-C-T--------T--GA-A-A-C-T-G-TCA--G-A-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-T-G-A-G-G-G-----C-AA--GA-G-A------------A-GA-G-AG-C----GG--AATT-CCT-A-GT--GT-A-GCG-GTGAAA-TG-CGT-AGAT-A-TT-A-GGA--AG-A-AC-A-CC-AG--T--G--GC-GAA-A--G-C---G----G--C-T-CTCTG------G-CT-TG--------------------------------------------------------------AC-C-C-T--GA--CG-----CT-GA-GG--C-A-CGA--AA-G-C--------------T-AGGG-GAG-C-G-AACG--GG-ATTA-G-ATA-C-----CC-C-G-GTA-G-T----C-CT--G-G-CTG-T-AAA--C-GCTG-GA--TA-CT---------A-GG--T--G-T-TG-G--G-G--G--T------------------------------------------------------------------------------------TC-AA----------------------------------------------------------------------------------------------------------------------------------------------C---T-C-C-CT--C-A-G-T-GC-T------GC--A----GT-TAA--CG-C-G-T--T--AA-GT--A----T-CCC-GCC-T-G-GG-GAT-TA---CGA-----C-C--G-C-A-A-GGT-T--GAA-ACTC-AAA---------GGAA-TTG-ACGGG-G-GCCT-G----C-A--C-A-A-GCG-GC-G--G--AG-CA-T--GT-GGT-TT-AATT-C-G-AAG-CAAC-G-CG-C-AG-A-A-CC-TT-A-CC-AGGGC-TT-G-AC-A-T-C------------CCGTGAC-T-------------A-TC-T-G-T--CA--A-CA-G-C-A--G-A-A--T-T-T-G---------GTCC------------------------------------T--TT-G------------------------------------------GA----T----C---AC-ACG-G-T--GA---------------------------------------------------C-A-G-G-T-GGTG-CA-TGG-CT--GTC-GTC-A-GC-TC---G-TG-TC-G--TGA-GA-TGT-T-GG-G-TT-AA-GT-CCCGC-AA--------C-GAG-CGC-A-ACC-C-C-TA--TC--C-TTAG--T-T-G-C-C---AG-C-A--T----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TAAG----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T----T-G------------G----G---G-A--CT---------------C-T-A-G-G-GA-G--AC-T-G-CCA--G-T------------------------------------C-A---AAA----------------------------------A-C-T-G--G-A-GG-A--AGG-T--GGGG-A-TGAC-GTC--AAGT-C---ATC-A-T-G-C-C-C-CTT----AT-G--CT-C-T-GG-GC-TA-CAC-ACGTG-C--TA--CAATG---G-CCTG-T-A--C-AGA-GG-GC--------------------------------------------------------------------------------------------------T-G-C-T-A--T-ACCG-C--A---------------------------------------A-GG-T-T-----------T--A-G-CC---A----------A--T-C-C------T-C-AAAAC-AG-G-T-C-C-CAG-TTC--------GGA-T-TGCTG-GC--T-GCAA-CT-C-------------------------------------------------------------------------------------------------G-CCTGC-A-T-G-AA-G-CT-GGAGT-CG-C-TA--G-TA-AT-C-G-C----GGA-TC-A-G-A-------AT--GCC-GC-G-GT-G-AAT-CCGT-T-CCCAGGCCT-TGTA----CACACCG-CCC-GTC-----A---CA--CCA-CC-CG-A--G---TTG-G-AT-GC-ACC--A-GAA------G--T-CG---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
>100011
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-T--AACACA-T-GC-A-AGT-CGA-A-C-G-----------A---TA-A----------------------------CCTGG--AG----------------------------------------------------CT--G----------------------------------------------------------------------------------CTC-T-A-------------------GG-GA--AT--T--AG-T-GG-C-GA-A--C-------------GGA-GTGAG-T--AC-AC-G-T-G-AG---TAA--C-CT-G--C-C-CTT--GA-C------------------------------------------------------------------T-CT----GGG-AT-AA-CCT-------------------------C-C-G-----------------------GAA-A---CGG-AAG-CTAA------CC-G--G-AT-A---------------------------------T-GA-C-G--C--------------------AC---GGAG-----------------------------------------------------------------------------------------------------------------------G-CA-T-------------------------------------------------------------------------------------------------------------------------------------CT-C----CTG---------------T--G-C-G-T-G-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAAA--G-A----------------------------------------------------------------------------------------------------------------------------------------ACT------------------------------------------------------------------------------------------------------------------------------------T---C-G--------------G----T-C---A-AG-G---AT---G-G-----A-CTC-GCG--G-CCT--A------TC--A--G-GT-A----G---TTGG-T-G-AG-G-T----AAC-GGCC-C---ACCA--A-GC-C----TACG-A------------CGG-G-T------A--CC-G-G-CCT-G-AG----A--GG-GT--G-AC-C-GG-CCAC-A-CTGGG--A-C-TG-A-TA-C-AC-G-G-CC-AGA-CTCC-TAC-G--G-G---G-GC-A-GC-ACGGTG---GG-G-A-ATA-TTGCA-C-AA-T-GG--GC-GA-A----A-G-CC-T-GA-TG-CA-GCA--CGCC-G-CG-T---A-G-G--G----------A--C-G-G-CC-----TT-CG---------G-G-T-T-G--------AA-C-CT---------TT-TT-A-T--T-AGG----GA-A--G---AAGC------------------------A-A---------------------------GT-GA-CG-GT-A-C-CT-G-TA------------A-----------AAAGC-ACC-GG-C-TAA---C--T-ACGT--GCCA--G-C---A--GCGG-----GG--TA-AT--AC---GT-AG-GGT-GCG-A-G-CG-TTGT-C-CGG-AA-TT-A--T-T--GGGC-GTA----AA-GAGC-TC--G-TA-G-G-C-G------------G--T-CT-G-T-C-GC----G-T-C-T---G-C-G-TGAG-AA-A---AC-CAG-G--------------------------------------------------------------------CT-C-AA-------------------------------------------------------------------------CC-T-C-GG-GC-T----T-G-C-A-G-T--------G--GA-T-A-C-G-G-GCA--G-A-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-A-G-A-G-T-------C-GG--TA-G-G------------G-GA-G-AA-T----GG--AATT-C---G-GT--GT---GCG-GTGGAA-TG-CGC-AGAT-A-TC-A-GGA--GG-A-CC---CC-GA--T--G--GC-GAA-T--G-C---A----G--T-T-CTCTG------G-C--CG-------------------------------------------------------------TA--A-C-T--GA--CA-----CT-GA-G---A-T-CGA--AA-G-C--------------G-TGGG-A---C-G-AACA--GG-ATTA-G-ATA-C-----CC-T-G-GTA-G-T----C-CA--C-G-CCG-T-AA---C-GTTG-CG--CT--T---------A-GA--T--G-T-GG-G-GA-C--C-------------------------------------------------------------------------------------ATTC-CACG------------------------------------------------------------------------------------------------------------------------------------------------G-T-T--T--C-C-G-T-GT-C------G---A----GC-TAA--CG-C-A-T--T--AA-TG--C----G-CCC-GCC-T-G-GG-GAG-TA---CGG-----C----G-C-A-A-GGC-T--AAA--CTC-AAG------------A-TTG-ACGGG-G-G-CCCG----C-A--C-A-C-GCG-AG-----------A-T--GC-GGA-TT-AATT-G-A-TCG-CAAC-G-CG-A-AG-A-A-CC-TT-A-CC-AAGGC-TT-G-AC-A-T-A------------C-ACG-A-G-------------A-TA---C-G-GGCCAGAAA-T-G-G----T----C-A-A-C----------TC---------------------------------------TTTGG------------------------------------------AC----------AC-TC-AGT---G--AA---------------------------------------------------C-A-G-G-T-GGTG-CA-TGG-TT--GTC-GTC-A-GC-TC---G-TG-TC-A--TGA-GA-TGT-T-GG-G-TT-AA-GT-CCCGC-AA--------C-GAG-CGC-A-ACC-C-C-TG--TG--G-TTAG--T-T-G-C-C---AG-C-A--C--G-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TAA------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TG---G----T-G------------G----G---A-A--CT---------------C-A-T-A-G-GA-G--AC-T-G-CC---G-G------------------------------------G-T---CAA----------------------------------C-T---G--G-A-GG----AGG-T--GGGG-A-TGAC-GTC--AAAT-A---ATC-A-T-G--CC-C-CTT----AT-G--TC-T-T-GG-GC-TT-CAC-GTATG-C--TA--CAATG---C-CGGT-A-A--C-AAA-GG-GC--------------------------------------------------------------------------------------------------T-G-C-A-A--T-ACCG-T--A---------------------------------------A-GG-T-G-----------G--A---CG---A----------A--TCC-C------A-A-AAA-C-CG-G-T-C-T-CAG-TTC--------GGA-T-TGAGG-TC--T-GCAA-CT-C-------------------------------------------------------------------------------------------------G-ACCTC-A-T-G--A-G-TC-GGA-T-CG---TA--G-TA-AT-C-G-C----AGA-TC-A----A------AC--GCT--C-G-GT-G--AT-ACGT----CCCGGCCT-TGT-----CACACCG-CCC-GTC-----A---AG--TCA-TG-AA-A--G----TC-G-GA-AC-ACC--C-GA-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
"""
input_seqs1_fasta = """>FAKE1 here is some desc.73602 tag1;tag2, tag3:tag4
AGGCGGCTACCTGGACCAACACTGACACTGAGGCACGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCCTAAACGATGCGAACTGGATGTTGGGTGCAATTTGGCACGCAGTATCGAAGCTAACGCGTTAAGTTCGCCGCCTGGGGA
GTACGGTCGCAAGACTTAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGTATGTGGTTTAATTCGATGCAACGCGAAGAACCTTACCTGGTCTTGACATCCACGGAACTTTCCATAGATGGATTGGTGCCTTCGGGAACCGTGAGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCC
TTGTCCTTAGTTGCCAGCACGTAATGGTGGGAACTCTAAGGAGACCGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAGTCATCATGGCCCTTAGGGGACCAGGGCTACACACGTACTACAATGGTAGGGACAGAGGGCTGCAAACCCGCGAGGGCAAGCCAATCCCAGAAACCCTATCTCAGTCCGGATTGGAGTTTGCAACTCGACTCCATGAAGTCGGAATCGCTAGTAATCGCAGATCAGCATTGCTGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCATGGGAGTTTGTTGCACCAGAA
GCAGGTAGCTTAACCTTCGGGAGGGCGCTCACGGTGTGGCCGATGACTGGGGTGAAGTCGTAACAAGGTAGCCGTATCGGAAGGTGCGGCTGGATCACCTCCTTTTGAGCATGACGTCATCGTCCTGTCGGGCGTCCTCACAAATTACCTGCATTCAGAGATGCGTATCGGCACAGGCCGGTATGCGAAAGTCCCATCATGGGGCCTTAGCTCAGCTGGGAGAGCACCTGCTTTGCAAGCAGGGGGTCGTCGGTTCGATCCCGACAGGCTCCACCATTTGAGTGAAACGACTTTGGGTCTGTAGCTCAGGTGGTTAGAGCGCACCCCTGATAAGGGTGAGGTCGGTGGTTCGAGTCCTCCCAGACCCACCACTCTGAATGTAGTGCACACTTAAGAATTTATATGGCTCAGCGTTGAGGCTGAGACATGTTCTTTTATAACTTGTGACGTAGCGAGCGTTTGAGATATCTATCTAAACGTGTCGTTGAGGCTAAGGCGGGGACTTCGAGTCCCTAAATAATTGAGTCGTATGTTCGCGTTGGTGGCTTTGTACCCCACACAACACGGCGTATGGCCCCGAGGCAACTTGGGGT
TATATGGTCAAGCGAATAAGCGCACACGGTGGATGCCTAGGCGGTCAGAGGCGATGAAGGACGTGGTAGCCTGCGAAAAGTGTCGGGGAGCTGGCAACAAGCTTTGATCCGGCAATATCCGAATGGGGAAACCCGG
>AKIW1129_fasta.screen.Contig1 description field
GAGTTTGATCATGGCTCAGGACGAACGCTGGCGGCGTGCCTAATACATGCAAGTCGAGCGAATGACAGAGGAGCTTGCTCCTCTCGATTTAGCGGCGGACGGGTGAGTAACACGTGGGTAACCTGCCTTATAGCTTGGGATAACTCCGGGAAACCGGGGCTAATACCGAATAATACTTTTGGACACATGTTCGAAAGTTGAAAGATGGTTCTGCTATCACTATAAGATGGACCCGCGCTGCATTAGCTAGTTGGTGAGGTAACGGCTCACCAAGGCCACGATGCATAGCCGACCTGAGAGGGTGATCGGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTAGGGAATCTTCCACAATGGACGAAAGTCTGATGGAGCAACGCCGCGTGAGTGAAGAAGGATTTCGGTTCGTAAAACTCTGTTGTAAGGGAAGAACAAGTACAGTAGTAACTGGCTGTACCTTGACGGTACCTTATTAGAAAGCCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGTGGTCCTTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGGGACTTGAGTGCAGAAGAGGATAGTGGAATTCCAAGTGTAGCGGTGAAATGCGTAGAGATTTGGAGGAACACCAGTGGCGAAGGCGACTGTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTGGGGGGTTTCCGCCCCTCAG
TGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGGTCGCAAGACTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCCATTGACCACTGTAGAGATACAGTTTTCCCTTCGGGGACAACGGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGTTCTTAGTTGCCATCATTTAGTTGGGCACTCTAAGGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGACGGTACAAACGGTTGCCAACCCGCGAGGGGGAGCTAATCCGATAAAACCGTTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCCGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGAGGTAACCTTTTGGAGCCAGCCGCCGAAGGTGGGATAGATGATTGGGGTGAAGTCGTAACAAGGT
>AKIW521_fasta.screen.Contig1
gagtttgatcatggctcagattgaacgctggcggcatgccttacacatgcaagtcgaacggcagcgcggggcaacctggcggcgagtggcgaacgggtgagtaatacatcggaacgtacccagaagtgggggataacgtagcgaaagttacgctaataccgcatacgttctacggaagaaagtgggggatcttcggacctcatgcttttggagcggccgatgtctgattagctagttggtgaggtaaaggctcaccaaggcgacgatcagtagctggtctgagaggacgaccagccacactgggactgagacacggcccagactcctacgggaggcagcagtggggaattttggacaatgggcgcaagcctgctccagcaatgccgcgtgagtgaagaagg
ccttcgggttgtaaagctcttttgtcagggaagaaacggctgaggttaataccttcggctaatgacggtacctgaagaataagcgccggctaactacgtgccagcagccgcggtaatacgtagggtgcaagcgttaatcggaattactgggcgtaaagcgtgcgcaggcggttttgtaagtctgacgtgaaatccccgggctcaacctgggaattgcgttggagactgcaaggctagagtctggcagaggggggtagaattccacgtgtagcagtgaaatgcgtagagatgtggaggaacaccgatgggcgaaggcagccccctgggtcaagactgacgctcatgcacgaaagcgtggggagcaaacaggattagataccctggtagtccacgcc
ctaaacgatgtctactagttgtcgggtcttaattgacttggtaacgcagctaacgcgtgaagtag
accgcctggggagtacggtcacaagattaaaactcaaaggaattgacggggacccgcacaagcggtggatgatgtggattaattcgatgcaacgcgaaaaaccttacctacccttgacatgtcaggaatcctcgagagattgaggagtgcccgaaagggaacctgaacacaggtgctgcatggctgtcgtcagctcgtgtcgtgagatgttgggttaagtcccgcaacgagcgcaacccttgtcattagttgctacgaaagggcactctaatgagactgccggtgacaa
accggaggaaggtgggga
tgacgtcaagtcctcatggcccttatgggtagggcttcacacgtcatacaatggtacatacagagggccgccaacccgcgagggggagctaatcccagaaagtgtatcgtagtccggatcgcagtctgcaactcgactgcgtgaagttggaatcgctagtaatcgcggatcagcatgccgcggtgaatacgttcccgggtcttgtacacaccgcccgtcacaccatgggagcgggttttaccagaagtaggtagcttaaccgcaaggggggcgcttaccacggtaggattcgtgactggggtgaagtcgtaacaaggtaa
>modified_AKIW1129_both_ends_extended
CCGGAATTCCTTTTAAGAGTTTGATCATGGCTCAGGACGAACGCTGGCGGCGTGCCTAATACATGCAAGTCGAGCGAATGACAGAGGAGCTTGCTCCTCTCGATTTAGCGGCGGACGGGTGAGTAACACGTGGGTAACCTGCCTTATAGCTTGGGATAACTCCGGGAAACCGGGGCTAATACCGAATAATACTTTTGGACACATGTTCGAAAGTTGAAAGATGGTTCTGCTATCACTATAAGATGGACCCGCGCTGCATTAGCTAGTTGGTGAGGTAACGGCTCACCAAGGCCACGATGCATAGCCGACCTGAGAGGGTGATCGGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTAGGGAATCTTCCACAATGGACGAAAGTCTGATGGAGCAACGCCGCGTGAGTGAAGAAGGATTTCGGTTCGTAAAACTCTGTTGTAAGGGAAGAACAAGTACAGTAGTAACTGGCTGTACCTTGACGGTACCTTATTAGAAAGCCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGTGGTCCTTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGGGACTTGAGTGCAGAAGAGGATAGTGGAATTCCAAGTGTAGCGGTGAAATGCGTAGAGATTTGGAGGAACACCAGTGGCGAAGGCGACTGTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTGGGGGGTTTCCGCCCCTCAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGGTCGCAAGACTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCCATTGACCACTGTAGAGATACAGTTTTCCCTTCGGGGACAACGGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGTTCTTAGTTGCCATCATTTAGTTGGGCACTCTAAGGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGACGGTACAAACGGTTGCCAACCCGCGAGGGGGAGCTAATCCGATAAAACCGTTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCCGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGAGGTAACCTTTTGGAGCCAGCCGCCGAAGGTGGGATAGATGATTGGGGTGAAGTCGTAACAAGGTCCGGAATTCCTTTTAA
>modified_AKIW1129_5_prime_end_extended
CCGGAATTCCTTTTAAGAGTTTGATCATGGCTCAGGACGAACGCTGGCGGCGTGCCTAATACATGCAAGTCGAGCGAATGACAGAGGAGCTTGCTCCTCTCGATTTAGCGGCGGACGGGTGAGTAACACGTGGGTAACCTGCCTTATAGCTTGGGATAACTCCGGGAAACCGGGGCTAATACCGAATAATACTTTTGGACACATGTTCGAAAGTTGAAAGATGGTTCTGCTATCACTATAAGATGGACCCGCGCTGCATTAGCTAGTTGGTGAGGTAACGGCTCACCAAGGCCACGATGCATAGCCGACCTGAGAGGGTGATCGGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTAGGGAATCTTCCACAATGGACGAAAGTCTGATGGAGCAACGCCGCGTGAGTGAAGAAGGATTTCGGTTCGTAAAACTCTGTTGTAAGGGAAGAACAAGTACAGTAGTAACTGGCTGTACCTTGACGGTACCTTATTAGAAAGCCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGTGGTCCTTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGGGACTTGAGTGCAGAAGAGGATAGTGGAATTCCAAGTGTAGCGGTGAAATGCGTAGAGATTTGGAGGAACACCAGTGGCGAAGGCGACTGTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTGGGGGGTTTCCGCCCCTCAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGGTCGCAAGACTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCCATTGACCACTGTAGAGATACAGTTTTCCCTTCGGGGACAACGGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGTTCTTAGTTGCCATCATTTAGTTGGGCACTCTAAGGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGACGGTACAAACGGTTGCCAACCCGCGAGGGGGAGCTAATCCGATAAAACCGTTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCCGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGAGGTAACCTTTTGGAGCCAGCCGCCGAAGGTGGGATAGATGATTGGGGTGAAGTCGTAACAAGGT
>modified_AKIW1129_3_prime_end_extended
GAGTTTGATCATGGCTCAGGACGAACGCTGGCGGCGTGCCTAATACATGCAAGTCGAGCGAATGACAGAGGAGCTTGCTCCTCTCGATTTAGCGGCGGACGGGTGAGTAACACGTGGGTAACCTGCCTTATAGCTTGGGATAACTCCGGGAAACCGGGGCTAATACCGAATAATACTTTTGGACACATGTTCGAAAGTTGAAAGATGGTTCTGCTATCACTATAAGATGGACCCGCGCTGCATTAGCTAGTTGGTGAGGTAACGGCTCACCAAGGCCACGATGCATAGCCGACCTGAGAGGGTGATCGGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTAGGGAATCTTCCACAATGGACGAAAGTCTGATGGAGCAACGCCGCGTGAGTGAAGAAGGATTTCGGTTCGTAAAACTCTGTTGTAAGGGAAGAACAAGTACAGTAGTAACTGGCTGTACCTTGACGGTACCTTATTAGAAAGCCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGTGGTCCTTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGGGACTTGAGTGCAGAAGAGGATAGTGGAATTCCAAGTGTAGCGGTGAAATGCGTAGAGATTTGGAGGAACACCAGTGGCGAAGGCGACTGTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTGGGGGGTTTCCGCCCCTCAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGGTCGCAAGACTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCCATTGACCACTGTAGAGATACAGTTTTCCCTTCGGGGACAACGGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGTTCTTAGTTGCCATCATTTAGTTGGGCACTCTAAGGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGACGGTACAAACGGTTGCCAACCCGCGAGGGGGAGCTAATCCGATAAAACCGTTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCCGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGAGGTAACCTTTTGGAGCCAGCCGCCGAAGGTGGGATAGATGATTGGGGTGAAGTCGTAACAAGGTGATTACACCGGAATTCCTTTTAA"""
input_seqs1_aligned_fasta = """>AKIW1129_fasta.screen.Contig1 description field 1..1507
-------------------------------------------------------------------------------------------------------------GAGTTT-GA--T-CA-T-G-GCTC-AG-GA-CGAA-C-GC--TGG-C--G-GC-G-TG--C----C-T--AATACA-T-GC-A-AGT-CGA-G-CGA---------A-T---GA-C---------------------------AGAGG---AG----------------------------------------------------CTT-G----------------------------------------------------------------------------------CTCCTCT-------------------CG--A--TT--T--AG-C-GGCG-G--A--C-------------GGG-TGAGT-A--AC-AC-G-T-G-GG---TAA--C-CTGC--C-T--TA--TA-G------------------------------------------------------------------C-TT----GGG-AT-AA-CTC-------------------------C-G-G-----------------------GAA-A---CCG-GGG-CTAATAC---CG-A----AT-A---------------------------------A-TA-C-T--T--T----------------TGG---AC-------------------------------------------------------------------------------------------------------------------------A-CA-T--------------------------------------------------------------------------------------------------------------------------------------G---T--TCG---------------A--A--AG-T-T-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAAA--G-A-T-GG----------------------------------------------------------------------------------------------------------------------------------TTCT--------------------------------------------------------------------------------------------------------------------------G--C--TA--T---C-A--------------C----T-A---T-AA-G---AT---G-G-----A-CCC-GCG--C-TGC--A------TT--A--G-CT-A----G---TTGG-T-G-AG-G-T----AAC-GG-C-T-C-ACCA--A-GG-C-C--A-CG-A------------TGC-A-T------AG-CC-G-A-CCT-G-AG----A--GG-GT--G-AT-C-GG-CCAC-A-CTGGG--A-C-TG-A-GA-C-AC-G-G-CCCAGA-CTCC-TAC-G--G-G-A-G-GC-A-GC-A-G-TA---GG-G-A-ATC-TTCCA-C-AA-T-GG--AC-GA-A----A-G-TC-T-GA-TG-GA-GCAA-CGCC-G-CG-T---G-A-G--T--GA-A-G--A--A-G-G-AT-----TT-CG---------G-T-T-C-G-T--A---AA-A-CTC--------TG-TT-G-T--A-AGG----GA-A--G---AACAAGT---ACAG-TA----G--T--AA-C---T----G-----G--C-TGT-ACC-TT-GA-CG-GT-A-C-CT-T-AT-T---------AG-----------AAAGC-CAC-GG-C-TAA---C--T-ACGT--GCCA--G-C---A--GCCG---C-GG--TA-AT--AC---GT-AG-GTG-GCA-A-G-CG-TTGT-C-CGG-AA-TT-A--T-T--GGGC-GTA----AA-GCGC-GC--G-CA-G-G-T-G------------G--T-CC-T-T-T-AA----G-T-C-T---G-ATG-TG-A-AA-GC--CC-ACG-G--------------------------------------------------------------------CT-C-AA-------------------------------------------------------------------------CC-G-T-GG-AG------G-GTC-A-T-T--------G--GA-A-A-C-T-G-GGG--G-A-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-T-G-A-G-T-G-----C-AG--AA-G-A------------G-GA-T-AG-T----GG--AATT-CCA-A-GT--GT-A-GCG-GTGAAA-TG-CGT-AGAG-A-TT-T-GGA--GG-A-AC-A-CC-AG--T--G--GC-GAA-G--G-C---G----A--C-T-GTCTG------G-TC-TG--------------------------------------------------------------TA-A-C-T--GA--CA-----CT-GA-GG--C-G-CGA--AA-G-C--------------G-TGGG-GAG-C-A-AACA--GG-ATTA-G-ATA-C-----CC-T-G-GTA-G-T----C-CA--C-G-CCG-T-AAA--C-GATG-AG--TG-CT---------A-AG--T--G-T-TG-G-GG-G--G--T------------------------------------------------------------------------------------TT-CC----------------------------------------------------------------------------------------------------------------------------------------------G---C-C-C-CT--C-A-G-T-GC-T------GC--A----GC-TAA--CG-C-A-T--T--AA-GC--A----C-TCC-GCC-T-G-GG-GAG-TA---CGG-----T-C--G-C-A-A-GAC-T--GAA-ACTC-AAA---------GGAA-TTG-ACGGG-G-G-CCCG----C-A--C-A-A-GCG-GT-G--G--AG-CA-T--GT-GGT-TT-AATT-C-G-AAG-CAAC-G-CG-A-AG-A-A-CC-TT-A-CC-AGGTC-TT-G-AC-A-TCC--------------CAT-T-G-------------A-CC-A-C-T--GT--A-GA-G-A-T--A-C-A--G-T-T-T--T-C-----CC-------------------------------------T--TC-G------------------------------------------GG----G----A--CAA-CGG---T--GA---------------------------------------------------C-A-G-G-T-GGTG-CA-TGG-TT--GTC-GTC-A-GC-TC---G-TG-TC-G--TGA-GA-TGT-T-GG-G-TT-AA-GT-CCCGC-AA--------C-GAG-CGC-A-ACC-C-T-TG--TT--C-TTAG--T-T-G-C-C---AT-C-A--T----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TTAG----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T----T-G------------G----G---C-A--CT---------------C-T-A-A-G-GA-G--AC-T-G-CCG--G-T------------------------------------G-A---CAA----------------------------------A-C-C-G--G-A-GG-A--AGG-T--GGGG-A-TGAC-GTC--AAAT-C---ATC-A-T-G-C-C-C-CTT----AT-G--AC-C-T-GG-GC-TA-CAC-ACGTG-C--TA--CAATG---G-ACGG-T-A--C-AAA-CG-GT--------------------------------------------------------------------------------------------------T-G-C-C-A--A-CCCG-C--G---------------------------------------A-GG-G-G-----------G--A-G-CT---A----------A--TCC-G------A-T-AAAAC-CG-T-T-C-T-CAG-TTC--------GGA-T-TGTAG-GC--T-GCAA-CT-C-------------------------------------------------------------------------------------------------G-CCTAC-A-T-G-AA-G-CC-GGAAT-CG-C-TA--G-TA-AT-C-G-C----GGA-TC-A-G-C-------AT--GCC-GC-G-GT-G-AAT-ACGT-T-CCCGGGCCT-TGTA----CACACCG-CCC-GTC-----A---CA--CCA-CG-AG-A--G---TTT-G-TA-AC-ACC--C-GAA------G--T-CGG-TG-A-G-G-T-AA-C-C-T-----------------------------------------------------------T-TT--------------------------------------------------------------------------------------------------------GG-A-G-C-C--A---GC-CGC--CG--AAG-G----T-GGG-AT-AGA------------------------TG--ATT-GGGG-TG-AAG-TCGTAACAA-GGT---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
>AKIW521_fasta.screen.Contig1 1..1488
----------------------------------------------------------------------------------------------------------------GAGTTTGAT-CA-T-G-GCTC-AG-AT-TGAA-C-GC--TGG-C--G-GC-A-TG--C----C-T--TACACA-T-GC-A-AGT-CGA-A-CG----------G-CAG-C---------------------------------GC-G-GG----------------------------------------------------GCA-A----------------------------------------------------------------------------------CC---T------------------G-GC--G--GC--G--AG-T-GG-C-GA-A--C-------------GGG-TGAGT-A--AT-AC-A-T-C-GG---A-A--C-GT-A--C-C-CAG--AA-G------------------------------------------------------------------T-GG----GGG-AT-AA-CGT-------------------------A-G-C-----------------------GAA-A---GTT-ACG-CTAA-TA---CC-G--C-AT-A----------C--------------------G-------------------------------------TT-C-----------------------------------------------------------------------------------------------------------------------T-AC-G--------------------------------------------------------------------------------------------------------------------------------------G-A-A---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAAA--G-T-G-GG-----G--GA-T--C-------------------------------------------------------------------------------------------------------------------TTCG-G----------------------------------------------------------------------------------------------------------------------A----CC-TC--A---T-G--------------C----T-T---T-TG-G---AG---C-G-----G-CCG-ATG--T-CTG--A------TT--A--G-CT-A----G---TTGG-T-G-AG-G-T----AAA-GG-C-T-C-ACCA--A-GG-C-G--A-CG-A------------TCA-G-T------AG-CT-G-G-TCT-G-AG----A--GG-AC--G-AC-C-AG-CCAC-A-CTGGG--A-C-TG-A-GA-C-AC-G-G-CCCAGA-CTCC-TAC-G--G-G-A-G-GC-A-GC-A-G-TG---GG-G-A-ATT-TTGGA-C-AA-T-GG--GC-GC-A----A-G-CC-T-GC-TC-CA-GCAA-TGCC-G-CG-T---G-A-G--T--GA-A-G--A--A-G-G-CC-----TT-CG---------G-G-T-T-G-T--A---AA-G-CTC--------TT-TT-G-T--C-AGG----GA-A--G---AA-ACGG---CTGA-GG----T--T--AA-T---A----C-----CT-T-CGGCTAA--T-GA-CG-GT-A-C-CT-G-AA-G---------AA-----------TAAGC-GCC-GG-C-TAA---C--T-ACGT--GCCA--G-C---A--GCCG---C-GG--TA-AT--AC---GT-AG-GGT-GCA-A-G-CG-TTAA-T-CGG-AA-TT-A--C-T--GGGC-GTA----AA-GCGT-GC--G-CA-G-G-C-G------------G--T-TT-T-G-T-AA----G-T-C-T---G-ACG-TG-A-AA-TC--CC-CGG-G--------------------------------------------------------------------CT-C-AA-------------------------------------------------------------------------CC-T-G-GG-AA-T----T-G-C-G-T-T--------G--GA-G-A-C-T-G-CAA--G-G-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-A-G-A-G-T-C-----T-GG--CA-G-A------------G-GG-G-GG-T----AG--AATT-CCA-C-GT--GT-A-GCA-GTGAAA-TG-CGT-AGAG-A-TG-T-GGA--GG-A-AC-A-CC-GA--T--G-GGC-GAA-G---GC---A----G--C-C-CCCTG------G-GT-CA--------------------------------------------------------------AG-A-C-T--GA--CG-----CT-CA-TG--C-A-CGA--AA-G-C--------------G-TGGG-GAG-C-A-AACA--GG-ATTA-G-ATA-C-----CC-T-G-GTA-G-T----C-CA--C-G-CCC-T-AAA--C-GATG-TC--TA-CT---------A-GT--T--G-T-CG-G-GT-C--T---------------------------------------------------------------------------------------TA-AT--------------------------------------------------------------------------------------------------------------------------------------------------T-G-A-CT--T-G-G-T-AA-C------GC--A----GC-TAA--CG-C-G-T--G--AA-GT--A----G-ACC-GCC-T-G-GG-GAG-TA---CGG-----T-C--A-C-A-A-GAT-T--AAA-ACTC-AAA---------GGAA-TTG-ACGGG-G-A-CCCG----C-A--C-A-A-GCG-GT-G--G--AT-GA-T--GT-GGA-TT-AATT-C-G-ATG-CAAC-G-CG-A-AA-A-A-CC-TT-A-CC-TACC-CTT-G-AC-A-T-G--------------TCA-G-G-------------A-AT-C-C-T--CG--A-GA-G-A-T--T-G-A--G-G-A-G--T-GC----CC-------------------------------------G--AA-A------------------------------------------GG---GA----A---CC-TGA---A--CA---------------------------------------------------C-A-G-G-T-GCTG-CA-TGG-CT--GTC-GTC-A-GC-TC---G-TG-TC-G--TGA-GA-TGT-T-GG-G-TT-AA-GT-CCCGC-AA--------C-GAG-CGC-A-ACC-C-T-TG--TC--A-TTAG--T-T-G-C-T---A--C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAAA-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------G------------G----G---C-A--CT---------------C-T-A-A-T-GA-G--AC-T-G-CCG--G-T------------------------------------G-A---CAA----------------------------------A-C-C-G--G-A-GG-A--AGG-T--GGGG-A-TGAC-GTC--AAGT-C---CTC-A-T-G-G-C-C-CTT----AT-G--GG-T-A-GG-GC-TT-CAC-ACGTC-A--TA--CAATG---G-TACA-T-A--C-AGA--GGGC--------------------------------------------------------------------------------------------------C-G-C-C-A--A-CCCG-C--G---------------------------------------A-GG-G-G-----------G--A-G-CT---A----------A--TCC-C------A-G-AAAGT-GT-A-T-C-G-TAG-TCC--------GGA-T-CGCAG-TC--T-GCAA-CT-C-------------------------------------------------------------------------------------------------G-ACTGC-G-T-G-AA-G-TT-GGAAT-CG-C-TA--G-TA-AT-C-G-C----GGA-TC-A-G-C-------AT--GCC-GC-G-GT-G-AAT-ACGT-T-CCCGGGTCT-TGTA----CACACCG-CCC-GTC-----A---CA--CCA-TG-GG-A--G---CGG-G-TT-TT-ACC--A-GAA------G--T-AGG-TA-G-C-T-T-AA-C-C-------------------------------------------------------------G-CA-A------------------------------------------------------------------------------------------------------GG-G--GG-G--C---GC-TTA--CC--ACG-G----T-AGG-AT-TCG------------------------TG--ACT-GGGGTGAAGTCGTAACAAGGTAA-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
>modified_AKIW1129_both_ends_extended 16..1523
------------------------------------------------------------------------------------------------------------AGAGTTT-GA--T-CA-T-G-GCTC-AG-GA-CGAA-C-GC--TGG-C--G-GC-G-TG--C----C-T--AATACA-T-GC-A-AGT-CGA-G-CGA---------A-T---GA-C---------------------------AGAGG---AG----------------------------------------------------CTT-G----------------------------------------------------------------------------------CTCCTCT-------------------CG--A--TT--T--AG-C-GGCG-G--A--C-------------GGG-TGAGT-A--AC-AC-G-T-G-GG---TAA--C-CTGC--C-T--TA--TA-G------------------------------------------------------------------C-TT----GGG-AT-AA-CTC-------------------------C-G-G-----------------------GAA-A---CCG-GGG-CTAATAC---CG-A----AT-A---------------------------------A-TA-C-T--T--T----------------TGG---AC-------------------------------------------------------------------------------------------------------------------------A-CA-T--------------------------------------------------------------------------------------------------------------------------------------G---T--TCG---------------A--A--AG-T-T-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAAA--G-A-T-GG----------------------------------------------------------------------------------------------------------------------------------TTCT--------------------------------------------------------------------------------------------------------------------------G--C--TA--T---C-A--------------C----T-A---T-AA-G---AT---G-G-----A-CCC-GCG--C-TGC--A------TT--A--G-CT-A----G---TTGG-T-G-AG-G-T----AAC-GG-C-T-C-ACCA--A-GG-C-C--A-CG-A------------TGC-A-T------AG-CC-G-A-CCT-G-AG----A--GG-GT--G-AT-C-GG-CCAC-A-CTGGG--A-C-TG-A-GA-C-AC-G-G-CCCAGA-CTCC-TAC-G--G-G-A-G-GC-A-GC-A-G-TA---GG-G-A-ATC-TTCCA-C-AA-T-GG--AC-GA-A----A-G-TC-T-GA-TG-GA-GCAA-CGCC-G-CG-T---G-A-G--T--GA-A-G--A--A-G-G-AT-----TT-CG---------G-T-T-C-G-T--A---AA-A-CTC--------TG-TT-G-T--A-AGG----GA-A--G---AACAAGT---ACAG-TA----G--T--AA-C---T----G-----G--C-TGT-ACC-TT-GA-CG-GT-A-C-CT-T-AT-T---------AG-----------AAAGC-CAC-GG-C-TAA---C--T-ACGT--GCCA--G-C---A--GCCG---C-GG--TA-AT--AC---GT-AG-GTG-GCA-A-G-CG-TTGT-C-CGG-AA-TT-A--T-T--GGGC-GTA----AA-GCGC-GC--G-CA-G-G-T-G------------G--T-CC-T-T-T-AA----G-T-C-T---G-ATG-TG-A-AA-GC--CC-ACG-G--------------------------------------------------------------------CT-C-AA-------------------------------------------------------------------------CC-G-T-GG-AG------G-GTC-A-T-T--------G--GA-A-A-C-T-G-GGG--G-A-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-T-G-A-G-T-G-----C-AG--AA-G-A------------G-GA-T-AG-T----GG--AATT-CCA-A-GT--GT-A-GCG-GTGAAA-TG-CGT-AGAG-A-TT-T-GGA--GG-A-AC-A-CC-AG--T--G--GC-GAA-G--G-C---G----A--C-T-GTCTG------G-TC-TG--------------------------------------------------------------TA-A-C-T--GA--CA-----CT-GA-GG--C-G-CGA--AA-G-C--------------G-TGGG-GAG-C-A-AACA--GG-ATTA-G-ATA-C-----CC-T-G-GTA-G-T----C-CA--C-G-CCG-T-AAA--C-GATG-AG--TG-CT---------A-AG--T--G-T-TG-G-GG-G--G--T------------------------------------------------------------------------------------TT-CC----------------------------------------------------------------------------------------------------------------------------------------------G---C-C-C-CT--C-A-G-T-GC-T------GC--A----GC-TAA--CG-C-A-T--T--AA-GC--A----C-TCC-GCC-T-G-GG-GAG-TA---CGG-----T-C--G-C-A-A-GAC-T--GAA-ACTC-AAA---------GGAA-TTG-ACGGG-G-G-CCCG----C-A--C-A-A-GCG-GT-G--G--AG-CA-T--GT-GGT-TT-AATT-C-G-AAG-CAAC-G-CG-A-AG-A-A-CC-TT-A-CC-AGGTC-TT-G-AC-A-TCC--------------CAT-T-G-------------A-CC-A-C-T--GT--A-GA-G-A-T--A-C-A--G-T-T-T--T-C-----CC-------------------------------------T--TC-G------------------------------------------GG----G----A--CAA-CGG---T--GA---------------------------------------------------C-A-G-G-T-GGTG-CA-TGG-TT--GTC-GTC-A-GC-TC---G-TG-TC-G--TGA-GA-TGT-T-GG-G-TT-AA-GT-CCCGC-AA--------C-GAG-CGC-A-ACC-C-T-TG--TT--C-TTAG--T-T-G-C-C---AT-C-A--T----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TTAG----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T----T-G------------G----G---C-A--CT---------------C-T-A-A-G-GA-G--AC-T-G-CCG--G-T------------------------------------G-A---CAA----------------------------------A-C-C-G--G-A-GG-A--AGG-T--GGGG-A-TGAC-GTC--AAAT-C---ATC-A-T-G-C-C-C-CTT----AT-G--AC-C-T-GG-GC-TA-CAC-ACGTG-C--TA--CAATG---G-ACGG-T-A--C-AAA-CG-GT--------------------------------------------------------------------------------------------------T-G-C-C-A--A-CCCG-C--G---------------------------------------A-GG-G-G-----------G--A-G-CT---A----------A--TCC-G------A-T-AAAAC-CG-T-T-C-T-CAG-TTC--------GGA-T-TGTAG-GC--T-GCAA-CT-C-------------------------------------------------------------------------------------------------G-CCTAC-A-T-G-AA-G-CC-GGAAT-CG-C-TA--G-TA-AT-C-G-C----GGA-TC-A-G-C-------AT--GCC-GC-G-GT-G-AAT-ACGT-T-CCCGGGCCT-TGTA----CACACCG-CCC-GTC-----A---CA--CCA-CG-AG-A--G---TTT-G-TA-AC-ACC--C-GAA------G--T-CGG-TG-A-G-G-T-AA-C-C-T-----------------------------------------------------------T-TT--------------------------------------------------------------------------------------------------------GG-A-G-C-C--A---GC-CGC--CG--AAG-G----T-GGG-AT-AGA------------------------TG--ATT-GGGG-TG-AAG-TCGTAACAA-GGT---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
>modified_AKIW1129_5_prime_end_extended 16..1523
------------------------------------------------------------------------------------------------------------AGAGTTT-GA--T-CA-T-G-GCTC-AG-GA-CGAA-C-GC--TGG-C--G-GC-G-TG--C----C-T--AATACA-T-GC-A-AGT-CGA-G-CGA---------A-T---GA-C---------------------------AGAGG---AG----------------------------------------------------CTT-G----------------------------------------------------------------------------------CTCCTCT-------------------CG--A--TT--T--AG-C-GGCG-G--A--C-------------GGG-TGAGT-A--AC-AC-G-T-G-GG---TAA--C-CTGC--C-T--TA--TA-G------------------------------------------------------------------C-TT----GGG-AT-AA-CTC-------------------------C-G-G-----------------------GAA-A---CCG-GGG-CTAATAC---CG-A----AT-A---------------------------------A-TA-C-T--T--T----------------TGG---AC-------------------------------------------------------------------------------------------------------------------------A-CA-T--------------------------------------------------------------------------------------------------------------------------------------G---T--TCG---------------A--A--AG-T-T-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAAA--G-A-T-GG----------------------------------------------------------------------------------------------------------------------------------TTCT--------------------------------------------------------------------------------------------------------------------------G--C--TA--T---C-A--------------C----T-A---T-AA-G---AT---G-G-----A-CCC-GCG--C-TGC--A------TT--A--G-CT-A----G---TTGG-T-G-AG-G-T----AAC-GG-C-T-C-ACCA--A-GG-C-C--A-CG-A------------TGC-A-T------AG-CC-G-A-CCT-G-AG----A--GG-GT--G-AT-C-GG-CCAC-A-CTGGG--A-C-TG-A-GA-C-AC-G-G-CCCAGA-CTCC-TAC-G--G-G-A-G-GC-A-GC-A-G-TA---GG-G-A-ATC-TTCCA-C-AA-T-GG--AC-GA-A----A-G-TC-T-GA-TG-GA-GCAA-CGCC-G-CG-T---G-A-G--T--GA-A-G--A--A-G-G-AT-----TT-CG---------G-T-T-C-G-T--A---AA-A-CTC--------TG-TT-G-T--A-AGG----GA-A--G---AACAAGT---ACAG-TA----G--T--AA-C---T----G-----G--C-TGT-ACC-TT-GA-CG-GT-A-C-CT-T-AT-T---------AG-----------AAAGC-CAC-GG-C-TAA---C--T-ACGT--GCCA--G-C---A--GCCG---C-GG--TA-AT--AC---GT-AG-GTG-GCA-A-G-CG-TTGT-C-CGG-AA-TT-A--T-T--GGGC-GTA----AA-GCGC-GC--G-CA-G-G-T-G------------G--T-CC-T-T-T-AA----G-T-C-T---G-ATG-TG-A-AA-GC--CC-ACG-G--------------------------------------------------------------------CT-C-AA-------------------------------------------------------------------------CC-G-T-GG-AG------G-GTC-A-T-T--------G--GA-A-A-C-T-G-GGG--G-A-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-T-G-A-G-T-G-----C-AG--AA-G-A------------G-GA-T-AG-T----GG--AATT-CCA-A-GT--GT-A-GCG-GTGAAA-TG-CGT-AGAG-A-TT-T-GGA--GG-A-AC-A-CC-AG--T--G--GC-GAA-G--G-C---G----A--C-T-GTCTG------G-TC-TG--------------------------------------------------------------TA-A-C-T--GA--CA-----CT-GA-GG--C-G-CGA--AA-G-C--------------G-TGGG-GAG-C-A-AACA--GG-ATTA-G-ATA-C-----CC-T-G-GTA-G-T----C-CA--C-G-CCG-T-AAA--C-GATG-AG--TG-CT---------A-AG--T--G-T-TG-G-GG-G--G--T------------------------------------------------------------------------------------TT-CC----------------------------------------------------------------------------------------------------------------------------------------------G---C-C-C-CT--C-A-G-T-GC-T------GC--A----GC-TAA--CG-C-A-T--T--AA-GC--A----C-TCC-GCC-T-G-GG-GAG-TA---CGG-----T-C--G-C-A-A-GAC-T--GAA-ACTC-AAA---------GGAA-TTG-ACGGG-G-G-CCCG----C-A--C-A-A-GCG-GT-G--G--AG-CA-T--GT-GGT-TT-AATT-C-G-AAG-CAAC-G-CG-A-AG-A-A-CC-TT-A-CC-AGGTC-TT-G-AC-A-TCC--------------CAT-T-G-------------A-CC-A-C-T--GT--A-GA-G-A-T--A-C-A--G-T-T-T--T-C-----CC-------------------------------------T--TC-G------------------------------------------GG----G----A--CAA-CGG---T--GA---------------------------------------------------C-A-G-G-T-GGTG-CA-TGG-TT--GTC-GTC-A-GC-TC---G-TG-TC-G--TGA-GA-TGT-T-GG-G-TT-AA-GT-CCCGC-AA--------C-GAG-CGC-A-ACC-C-T-TG--TT--C-TTAG--T-T-G-C-C---AT-C-A--T----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TTAG----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T----T-G------------G----G---C-A--CT---------------C-T-A-A-G-GA-G--AC-T-G-CCG--G-T------------------------------------G-A---CAA----------------------------------A-C-C-G--G-A-GG-A--AGG-T--GGGG-A-TGAC-GTC--AAAT-C---ATC-A-T-G-C-C-C-CTT----AT-G--AC-C-T-GG-GC-TA-CAC-ACGTG-C--TA--CAATG---G-ACGG-T-A--C-AAA-CG-GT--------------------------------------------------------------------------------------------------T-G-C-C-A--A-CCCG-C--G---------------------------------------A-GG-G-G-----------G--A-G-CT---A----------A--TCC-G------A-T-AAAAC-CG-T-T-C-T-CAG-TTC--------GGA-T-TGTAG-GC--T-GCAA-CT-C-------------------------------------------------------------------------------------------------G-CCTAC-A-T-G-AA-G-CC-GGAAT-CG-C-TA--G-TA-AT-C-G-C----GGA-TC-A-G-C-------AT--GCC-GC-G-GT-G-AAT-ACGT-T-CCCGGGCCT-TGTA----CACACCG-CCC-GTC-----A---CA--CCA-CG-AG-A--G---TTT-G-TA-AC-ACC--C-GAA------G--T-CGG-TG-A-G-G-T-AA-C-C-T-----------------------------------------------------------T-TT--------------------------------------------------------------------------------------------------------GG-A-G-C-C--A---GC-CGC--CG--AAG-G----T-GGG-AT-AGA------------------------TG--ATT-GGGG-TG-AAG-TCGTAACAA-GGT---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
>modified_AKIW1129_3_prime_end_extended 12..1520
-----------------------------------------------------------------------------------------------------------------------------T-G-GCTC-AG-GA-CGAA-C-GC--TGG-C--G-GC-G-TG--C----C-T--AATACA-T-GC-A-AGT-CGA-G-CGA---------A-T---GA-C-------------------------------A-G-AG------------------------------------------------GAGCTTGCT----------------------------------------------------------------------------------CCTC--------------------T-CG--A--TT--T--AG-C-GG-C-GG-A--C-------------GGG-TGAGT-A--AC-AC-G-T-G-GG---TAA--C-CT-G--C-C-TTA--TA-G------------------------------------------------------------------C-TT----GGG-AT-AA-CTC-------------------------C-G-G-----------------------GAA-A---CCG-GGG-CTAA-TA---CC-G--A-AT-A----------A--------------------T-A--C-T-T--T--T-----------------GG---AC-A-----------------------------------------------------------------------------------------------------------------------C-AT-G--------------------------------------------------------------------------------------------------------------------------------------T---T--C-G---------------A--A-A-G-T-T-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAAA--G-A-T-GG--------TTCT-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GC-TA--T---C-A--------------C----T-A---T-AA-G---AT---G-G-----A-CCC-GCG--C-TGC--A------TT--A--G-CT-A----G---TTGG-T-G-AG-G-T----AAC-GG-C-T-C-ACCA--A-GG-C-C--A-CG-A------------TGC-A-T------AG-CC-G-A-CCT-G-AG----A--GG-GT--G-AT-C-GG-CCAC-A-CTGGG--A-C-TG-A-GA-C-AC-G-G-CCCAGA-CTCC-TAC-G--G-G-A-G-GC-A-GC-A-G-TA---GG-G-A-ATC-TTCCA-C-AA-T-GG--AC-GA-A----A-G-TC-T-GA-TG-GA-GCAA-CGCC-G-CG-T---G-A-G--T--GA-A-G--A--A-G-G-AT-----TT-CG---------G-T-T-C-G-T--A---AA-A-CTC--------TG-TT-G-T--A-AGG----GA-A--G---AACAAGT---ACAG-TA----G--T--AA-C---T----G-----G--C-TGT-ACC-TT-GA-CG-GT-A-C-CT-T-AT-T---------AG-----------AAAGC-CAC-GG-C-TAA---C--T-ACGT--GCCA--G-C---A--GCCG---C-GG--TA-AT--AC---GT-AG-GTG-GCA-A-G-CG-TTGT-C-CGG-AA-TT-A--T-T--GGGC-GTA----AA-GCGC-GC--G-CA-G-G-T-G------------G--T-CC-T-T-T-AA----G-T-C-T---G-ATG-TG-A-AA-GC--CC-ACG-G--------------------------------------------------------------------CT-C-AA-------------------------------------------------------------------------CC-G-T-GG-AG-G----G-T-C-A-T-T--------G--GA-A-A-C-T-G-GGG--G-A-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-T-G-A-G-T-G-----C-AG--AA-G-A------------G-GA-T-AG-T----GG--AATT-CCA-A-GT--GT-A-GCG-GTGAAA-TG-CGT-AGAG-A-TT-T-GGA--GG-A-AC-A-CC-AG--T--G--GC-GAA-G--G-C---G----A--C-T-GTCTG------G-TC-TG--------------------------------------------------------------TA-A-C-T--GA--CA-----CT-GA-GG--C-G-CGA--AA-G-C--------------G-TGGG-GAG-C-A-AACA--GG-ATTA-G-ATA-C-----CC-T-G-GTA-G-T----C-CA--C-G-CCG-T-AAA--C-GATG-AG--TG-CT---------A-AG--T--G-T-TG-G-GG-G----------------------------------------------------------------------------------------GTTT-CCGC--------------------------------------------------------------------------------------------------------------------------------------------------C-C-CT--C-A-G-T-GC-T------GC--A----GC-TAA--CG-C-A-T--T--AA-GC--A----C-TCC-GCC-T-G-GG-GAG-TA---CGG-----T-C--G-C-A-A-GAC-T--GAA-ACTC-AAA---------GGAA-TTG-ACGGG-G-G-CCCG----C-A--C-A-A-GCG-GT-G--G--AG-CA-T--GT-GGT-TT-AATT-C-G-AAG-CAAC-G-CG-A-AG-A-A-CC-TT-A-CC-AGGTC-TT-G-AC-A-T-C--------------CCATT-G-------------A----C-C-A--CT--GTAG-A-G-A--T------A-C-A-G--T-TT----TC-----------------------------------CCTT-CG-G------------------------------------------GG---AC----A----A-CGG---T--GA---------------------------------------------------C-A-G-G-T-GGTG-CA-TGG-TT--GTC-GTC-A-GC-TC---G-TG-TC-G--TGA-GA-TGT-T-GG-G-TT-AA-GT-CCCGC-AA--------C-GAG-CGC-A-ACC-C-T-TG--TT--C-TTAG--T-T-G-C-C---AT-C-A--T----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TTAG----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T----T-G------------G----G---C-A--CT---------------C-T-A-A-G-GA-G--AC-T-G-CCG--G-T------------------------------------G-A---CAA----------------------------------A-C-C-G--G-A-GG-A--AGG-T--GGGG-A-TGAC-GTC--AAAT-C---ATC-A-T-G-C-C-C-CTT----AT-G--AC-C-T-GG-GC-TA-CAC-ACGTG-C--TA--CAATG---G-ACGG-T-A--C-AAA-CG-GT--------------------------------------------------------------------------------------------------T-G-C-C-A--A-CCCG-C--G---------------------------------------A-GG-G-G-----------G--A-G-CT---A----------A--TCC-G------A-T-AAAAC-CG-T-T-C-T-CAG-TTC--------GGA-T-TGTAG-GC--T-GCAA-CT-C-------------------------------------------------------------------------------------------------G-CCTAC-A-T-G-AA-G-CC-GGAAT-CG-C-TA--G-TA-AT-C-G-C----GGA-TC-A-G-C-------AT--GCC-GC-G-GT-G-AA-TACGT-T-CCCGGGCCT-TGTA----CACACCG-CCC-GTC-----A---CA--CCA-CG-AG-A--G---TTT-G-TA-AC-ACC--C-GAA------G--T-CGG-TG-A-G-G-T-AA-C-C---------------------------------------------------------------TT-T---------------------------------------------------------------------------------------------------T--GG-A--GC-C--A---GC-CGC--CG--AAG-G----T-GGG-AT-AGA------------------------TG--ATT-GGGG-TG-AAG-TCGTAACAA-GGTGA-TTAC-ACCGGAA------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
"""
input_seqs1_fail_fasta = """>FAKE1 here is some desc.73602 tag1;tag2, tag3:tag4
AGGCGGCTACCTGGACCAACACTGACACTGAGGCACGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCCTAAACGATGCGAACTGGATGTTGGGTGCAATTTGGCACGCAGTATCGAAGCTAACGCGTTAAGTTCGCCGCCTGGGGAGTACGGTCGCAAGACTTAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGTATGTGGTTTAATTCGATGCAACGCGAAGAACCTTACCTGGTCTTGACATCCACGGAACTTTCCATAGATGGATTGGTGCCTTCGGGAACCGTGAGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGTCCTTAGTTGCCAGCACGTAATGGTGGGAACTCTAAGGAGACCGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAGTCATCATGGCCCTTAGGGGACCAGGGCTACACACGTACTACAATGGTAGGGACAGAGGGCTGCAAACCCGCGAGGGCAAGCCAATCCCAGAAACCCTATCTCAGTCCGGATTGGAGTTTGCAACTCGACTCCATGAAGTCGGAATCGCTAGTAATCGCAGATCAGCATTGCTGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCATGGGAGTTTGTTGCACCAGAAGCAGGTAGCTTAACCTTCGGGAGGGCGCTCACGGTGTGGCCGATGACTGGGGTGAAGTCGTAACAAGGTAGCCGTATCGGAAGGTGCGGCTGGATCACCTCCTTTTGAGCATGACGTCATCGTCCTGTCGGGCGTCCTCACAAATTACCTGCATTCAGAGATGCGTATCGGCACAGGCCGGTATGCGAAAGTCCCATCATGGGGCCTTAGCTCAGCTGGGAGAGCACCTGCTTTGCAAGCAGGGGGTCGTCGGTTCGATCCCGACAGGCTCCACCATTTGAGTGAAACGACTTTGGGTCTGTAGCTCAGGTGGTTAGAGCGCACCCCTGATAAGGGTGAGGTCGGTGGTTCGAGTCCTCCCAGACCCACCACTCTGAATGTAGTGCACACTTAAGAATTTATATGGCTCAGCGTTGAGGCTGAGACATGTTCTTTTATAACTTGTGACGTAGCGAGCGTTTGAGATATCTATCTAAACGTGTCGTTGAGGCTAAGGCGGGGACTTCGAGTCCCTAAATAATTGAGTCGTATGTTCGCGTTGGTGGCTTTGTACCCCACACAACACGGCGTATGGCCCCGAGGCAACTTGGGGTTATATGGTCAAGCGAATAAGCGCACACGGTGGATGCCTAGGCGGTCAGAGGCGATGAAGGACGTGGTAGCCTGCGAAAAGTGTCGGGGAGCTGGCAACAAGCTTTGATCCGGCAATATCCGAATGGGGAAACCCGG
"""
input_seqs2_fasta = """>2855189 SLEpi20M_15561395
TACGAAAGATCCAAGCGTTATTCGAAATGATTGGGCNTAAANAGTTTGTAGGCGGTATTTGTACTCACTTCTAAAAAACTAAGATTATCTCTTAGTATGG
"""
pynast_test_template_fasta2 = """>26799
-----------------------------------------------------------------------------------------------------AAATGGAGAGGTTT-GA--T-CC-T-G-GCTC-AG-GA-TGAA-C-GC--TGG-C--G-AT-A-TG--C----T-T--AACACA-T-GC-A-AGT-CGA-A-CGA---------A-T---AT-T--------------------------AAGTTTTCTTAAA--------------------------------------------------TTT-G----------------------------------------------------------------------------------TAG-AAA-------------------TT--TA-AT--ATTAG-T-GG-C-GA-A--C-------------GGG-TGAGT-A--AC-GC-G-T-A-AG---A-A--T-CT-G--C-T-TTT--GG-G------------------------------------------------------------------T-AA----AGA-AT-AA-CAA-------------------------T-T-G-----------------------GAA-A---CGA-TTG-CTAA-TA---CT-T--T-AT-A----------G----------------------------------------------------------GC-T-----------------------------------------------------------------------------------------------------------------------G-AG-G--------------------------------------------------------------------------------------------------------------------------------------A-G-T---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TAAA--G-G-T--------------------------------------------------------------------------------------------------------------------------------------TTT-A-------------------------------------------------------------------------------------------------------------------------------T--T-TCC-G--------------C----C-C---A-GA-A---AT---G-A-----G-CTT-GCG--T-CTG--A------TT--A--G-CT-A----G---TTGG-T-A-AG-A-T----AAA-AG-C-T-T-ACCA--A-GG-C-A--A-TG-A------------TCA-G-T------AG-TT-G-G-TCT-G-AG----A--GG-AT--G-AT-C-AA-CCAC-A-CTGGG--A-C-TG-A-GA-T-AC-G-G-CCCAGA-CCTT-TAC-G--G-A-G-G-GC-A-GC-A-G-TG---AG-G-A-ATT-TTCCG-C-AA-T-GG--GC-GA-A----A-G-CC-T-GA-CG-GA-GCAA-TATC-G-CG-T---G-A-A--G--GA-T-G--A--C-G-G-CC-----TG-TG---------G-G-T-T-G-T--A---AA-C-TTC--------TT-TT-C-T--T-AAG----AA-A--G---A--------------------A--T--TC------------------------------T-GA-CG-GT-A-C-TT-A-AG-G---------AA-----------TAAGC-ATC-GG-C-TAA---C--T-CCGT--GCCA--G-C---A--GCCG---C-GG--TA-AT--AC---GG-AG-GAT-GCA-A-G-CG-TTAT-C-CGA-AA-TT-A--T-T--GGGC-GTA----AA-GAGT-TT--G-TA-G-G-T-G------------G--T-TT-T-T-T-AA----G-T-C-T---A-CTG-TT-A-AA-TA--TC-AGA-G--------------------------------------------------------------------CT-T-AA-------------------------------------------------------------------------CT-T-T-GA-AC-A----A-G-C-A-G-T--------A-TGA-A-A-C-T-A-ATT--A-A-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-T-G-A-G-T-T-----T-GG--TA-G-A------------G-GC-A-GA-G----GG--AACT-CTC-G-AT--GT-A-GTG-GTGAAA-TA-CGT-AGAT-A-TC-G-GGG--GG-A-AC-A-CC-AG--T--A--GC-GAA-A--G-C---G----C--T-C-TGCTG------G-GC-CA--------------------------------------------------------------TA-A-C-T--GA--CA-----CT-GA-GA--A-A-CGA--AA-G-C--------------T-AGGG-GAG-C-A-AATA--GG-ATTA-G-ATA-C-----CC-T-A-GTA-G-T----C-CT--A-G-CTG-T-AAA--C-GATG-GA--TA-CT---------A-AG--T--A-T-TG-G-GC------------------------------------------------------------------------------------------TTTTTGAAG------------------------------------------------------------------------------------------------------------------------------------------------------TT--C-A-G-T-GT-T------GA--A----GC-TAA--CG-C-G-T--T--AA-GT--A----T-CCC-GCC-T-G-GG-GAG-TA---CGT-----T-C--G-C-A-A-GAA-T--GAA-ACTC-AAA---------GGAA-TTG-ACGGG-G-G-CCCG----C-A--C-A-A-GCG-GT-G--G--AG-CA-T--GT-GGT-TT-AATT-C-G-ATG-CAAC-G-CG-A-AG-A-A-CC-TT-A-CC-AGGAA-TT-G-AC-A-T-A--------------CTC-G-T--------------TGGTT-T-T--TT--A-GA-A-A-T--A-A-A--A-A-A-------------C-------------------------------------T--GT-T------------------------------------------A--------------AA-GAG---A--TA---------------------------------------------------C-A-G-G-T-GGTG-CA-TGG-CT--GTC-GTC-A-GC-TC---G-TG-TC-G--TGA-GA-TGT-T-GG-G-TT-AA-GT-CCCGC-AA--------C-GAG-CGC-A-ACC-C-T-TG--TC--T-TTAG--T-T-G-T-T---AT-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TA---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------G-A-GA-G--AC-T-G-CCG--G-T------------------------------------G-A---TAA----------------------------------A-C-C-G--G-A-GG-A--AGG-T--GAGG-A-TGAC-GTC--AAGT-C---AGC-A-T-G-C-C-C-CTT----AA-G--TC-C-T-GG-GC-GA-CAC-ACGTG-C--TA--CAATG---G-TATA-G-A--C-AAA-GG-GA--------------------------------------------------------------------------------------------------A-G-C-A-A--A-TCTG-C--G---------------------------------------A-AG-A-G-----------T--A-G-CA---A----------A--TCT-C------A---AAAAC-TATA-T-C-T-CAG-TTC--------GGA-T-TGCAG-GC--T-GCAA-CT-C-------------------------------------------------------------------------------------------------G-CCTGC-A-T-G-AA-G-TC-GGAAT-CG-C-TA--G-TA-AT-C-G-C----TGG-TC-A-G-CC------AT--ACA-GC-G-GT-G-AAT-ATGT-T-CTCGGGCCT-TGTA----CACACCG-CCC-ATC-----A---CG--CTC-GA-GA-A--A---TTG-G-AA-AT-ACC--C-AAA------G--T-CAT-CA-T-T-C-T-AA-CCATATT---------------------------------------------------------T-TT-T---------------------------------------------------------------------------------------------------G---G-A--AG-A--T---AA-TGC--CA--AAG-G----T-AGA-GC-TAG------------------------TG--ACT-CAAG-CG-AAG-TTGTAACAA-GGTAA-CCGT-ACTGGAA-GGTG-CGGT-TGGATCACCTCCTTA----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
"""
if __name__ == "__main__":
main()