pax_global_header 0000666 0000000 0000000 00000000064 14244530626 0014520 g ustar 00root root 0000000 0000000 52 comment=6c601163998616bb88991931e443c645858e162c
muscle-5.1.0/ 0000775 0000000 0000000 00000000000 14244530626 0013013 5 ustar 00root root 0000000 0000000 muscle-5.1.0/.gitattributes 0000664 0000000 0000000 00000004726 14244530626 0015717 0 ustar 00root root 0000000 0000000 ###############################################################################
# Set default behavior to automatically normalize line endings.
###############################################################################
* text=auto
###############################################################################
# Set default behavior for command prompt diff.
#
# This is need for earlier builds of msysgit that does not have it on by
# default for csharp files.
# Note: This is only used by command line
###############################################################################
#*.cs diff=csharp
###############################################################################
# Set the merge driver for project and solution files
#
# Merging from the command prompt will add diff markers to the files if there
# are conflicts (Merging from VS is not affected by the settings below, in VS
# the diff markers are never inserted). Diff markers may cause the following
# file extensions to fail to load in VS. An alternative would be to treat
# these files as binary and thus will always conflict and require user
# intervention with every merge. To do so, just uncomment the entries below
###############################################################################
#*.sln merge=binary
#*.csproj merge=binary
#*.vbproj merge=binary
#*.vcxproj merge=binary
#*.vcproj merge=binary
#*.dbproj merge=binary
#*.fsproj merge=binary
#*.lsproj merge=binary
#*.wixproj merge=binary
#*.modelproj merge=binary
#*.sqlproj merge=binary
#*.wwaproj merge=binary
###############################################################################
# behavior for image files
#
# image files are treated as binary by default.
###############################################################################
#*.jpg binary
#*.png binary
#*.gif binary
###############################################################################
# diff behavior for common document formats
#
# Convert binary document formats to text before diffing them. This feature
# is only available from the command line. Turn it on by uncommenting the
# entries below.
###############################################################################
#*.doc diff=astextplain
#*.DOC diff=astextplain
#*.docx diff=astextplain
#*.DOCX diff=astextplain
#*.dot diff=astextplain
#*.DOT diff=astextplain
#*.pdf diff=astextplain
#*.PDF diff=astextplain
#*.rtf diff=astextplain
#*.RTF diff=astextplain
muscle-5.1.0/.github/ 0000775 0000000 0000000 00000000000 14244530626 0014353 5 ustar 00root root 0000000 0000000 muscle-5.1.0/.github/workflows/ 0000775 0000000 0000000 00000000000 14244530626 0016410 5 ustar 00root root 0000000 0000000 muscle-5.1.0/.github/workflows/build_linux.yml 0000664 0000000 0000000 00000001273 14244530626 0021454 0 ustar 00root root 0000000 0000000 name: build_linux
on:
# Enable manual run from the Actions tab
workflow_dispatch:
jobs:
build:
runs-on: ubuntu-20.04
steps:
# Checks-out the repository under $GITHUB_WORKSPACE
- uses: actions/checkout@v2
# Runs a set of commands using the runners shell
- name: Build-commands
run: |
echo Starting Build-commands
echo GITHUB_WORKSPACE=$GITHUB_WORKSPACE
uname -a
cd $GITHUB_WORKSPACE
ls -lh
cd src
make LDFLAGS2=-static
- name: Upload binary artifact
uses: actions/upload-artifact@v2
with:
name: muscle-linux-binary
path: src/Linux/muscle
muscle-5.1.0/.github/workflows/build_osx.yml 0000664 0000000 0000000 00000001335 14244530626 0021125 0 ustar 00root root 0000000 0000000 name: build_osx
on:
# Enable manual run from the Actions tab
workflow_dispatch:
jobs:
# This workflow contains a single job called "build"
build:
runs-on: macos-10.15
steps:
# Checks-out the repository under $GITHUB_WORKSPACE
- uses: actions/checkout@v2
# Runs a set of commands using the runners shell
- name: Build-commands
run: |
echo Starting Build-commands
echo GITHUB_WORKSPACE=$GITHUB_WORKSPACE
uname -a
cd $GITHUB_WORKSPACE
ls -lh
cd src
make
- name: Upload binary artifact
uses: actions/upload-artifact@v2
with:
name: muscle-osx-binary
path: src/Darwin/muscle
muscle-5.1.0/.github/workflows/build_windows.yml 0000664 0000000 0000000 00000001151 14244530626 0022002 0 ustar 00root root 0000000 0000000 name: build_windows
on:
workflow_dispatch
jobs:
build:
runs-on: windows-2022
steps:
- uses: actions/checkout@v2
- uses: actions/setup-dotnet@v1
- name: Build
run: |
cd src
$msbuild = "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Msbuild\Current\Bin\MSBuild.exe"
$a = @("muscle.sln", "/p:Platform=x64", "/p:Configuration=Release")
& $msbuild $a
- name: Upload binary artifact
uses: actions/upload-artifact@v2
with:
name: muscle-windows-exe
path: src/x64/Release/muscle.exe
muscle-5.1.0/.gitignore 0000664 0000000 0000000 00000007110 14244530626 0015002 0 ustar 00root root 0000000 0000000 ## Ignore Visual Studio temporary files, build results, and
## files generated by popular Visual Studio add-ons.
##
## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
# User-specific files
*.rsuser
*.suo
*.user
*.userosscache
*.sln.docstates
# User-specific files (MonoDevelop/Xamarin Studio)
*.userprefs
# Mono auto generated files
mono_crash.*
# Build results
[Dd]ebug/
[Dd]ebugPublic/
[Rr]elease/
[Rr]eleases/
x64/
x86/
[Ww][Ii][Nn]32/
[Aa][Rr][Mm]/
[Aa][Rr][Mm]64/
bld/
[Bb]in/
[Oo]bj/
[Ll]og/
[Ll]ogs/
# Visual Studio 2015/2017 cache/options directory
.vs/
__pycache__/
*.o
*.pyc
# Cake - Uncomment if you are using it
# tools/**
# !tools/packages.config
# Tabs Studio
*.tss
# Telerik's JustMock configuration file
*.jmconfig
# BizTalk build output
*.btp.cs
*.btm.cs
*.odx.cs
*.xsd.cs
# OpenCover UI analysis results
OpenCover/
# Azure Stream Analytics local run output
ASALocalRun/
# MSBuild Binary and Structured Log
*.binlog
# NVidia Nsight GPU debugger configuration file
*.nvuser
# MFractors (Xamarin productivity tool) working folder
.mfractor/
# Local History for Visual Studio
.localhistory/
# Visual Studio History (VSHistory) files
.vshistory/
# BeatPulse healthcheck temp database
healthchecksdb
# Backup folder for Package Reference Convert tool in Visual Studio 2017
MigrationBackufiles
_Chutzpah*
# Visual C++ cache files
ipch/
*.aps
*.ncb
*.opendb
*.opensdf
*.sdf
*.cachefile
*.VC.db
*.VC.VC.opendb
# Visual Studio profiler
*.psess
*.vsp
*.vspx
*.sap
# Visual Studio Trace Files
*.e2e
# TFS 2012 Local Workspace
$tf/
# Guidance Automation Toolkit
*.gpState
# ReSharper is a .NET coding add-in
_ReSharper*/
*.[Rr]e[Ss]harper
*.DotSettings.user
# TeamCity is a build add-in
_TeamCity*
# DotCover
# files ending in .cache can be ignored
*.[Cc]ache
# but keep track of directories ending in .cache
!?*.[Cc]ache/
# Others
ClientBin/
~$*
*~
*.dbmdl
*.dbproj.schemaview
*.jfm
*.pfx
*.publishsettings
orleans.codegen.cs
# Including strong name files can present a security risk
# (https://github.com/github/gitignore/pull/2483#issue-259490424)
#*.snk
# Since there are multiple workflows, uncomment next line to ignore bower_components
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
#bower_components/
# RIA/Silverlight projects
Generated_Code/
# Backup & report files from converting an old project file
# to a newer Visual Studio version. Backup files are not needed,
# because we have git ;-)
_UpgradeReport_Files/
Backup*/
UpgradeLog*.XML
UpgradeLog*.htm
ServiceFabricBackup/
*.rptproj.bak
# SQL Server files
*.mdf
*.ldf
*.ndf
# Business Intelligence projects
*.rdl.data
*.bim.layout
*.bim_*.sep/
# Ionide (cross platform F# VS Code tools) working folder
.ionide/
# Fody - auto-generated XML schema
FodyWeavers.xsd
# VS Code files for those working on multiple tools
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
*.code-workspace
# Local History for Visual Studio Code
.history/
# Windows Installer files from build outputs
*.cab
*.msi
*.msix
*.msm
*.msp
# JetBrains Rider
*.sln.iml
asePS/
dlldata.c
# Benchmark Results
BenchmarkDotNet.Artifacts/
# .NET Core
project.lock.json
project.fragment.lock.json
artifacts/
# ASP.NET Scaffolding
ScaffoldingReadMe.txt
# StyleCop
StyleCopReport.xml
# Files built by Visual Studio
*_i.c
*_p.c
*_h.h
*.ilk
*.meta
*.obj
*.iobj
*.pch
*.pdb
*.ipdb
*.pgc
*.pgd
*.rsp
*.sbr
*.tlb
*.tli
*.tlh
*.tmp
*.tmp_proj
*_wpftmp.csproj
*.log
*.tlog
*.vspscc
*.vssscc
.builds
*.pidb
*.svclog
*.scc
# Chutzpah Test
CYGWIN*/
[lL]inux/
Darwin/
gitver.txt
muscle-5.1.0/CONTRIBUTING.md 0000664 0000000 0000000 00000000112 14244530626 0015236 0 ustar 00root root 0000000 0000000 Please see https://github.com/rcedgar/muscle/wiki/Contributing-to-MUSCLE.
muscle-5.1.0/LICENSE 0000664 0000000 0000000 00000104515 14244530626 0014026 0 ustar 00root root 0000000 0000000 GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc.
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The GNU General Public License is a free, copyleft license for
software and other kinds of works.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
the GNU General Public License is intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users. We, the Free Software Foundation, use the
GNU General Public License for most of our software; it applies also to
any other work released this way by its authors. You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
To protect your rights, we need to prevent others from denying you
these rights or asking you to surrender the rights. Therefore, you have
certain responsibilities if you distribute copies of the software, or if
you modify it: responsibilities to respect the freedom of others.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must pass on to the recipients the same
freedoms that you received. You must make sure that they, too, receive
or can get the source code. And you must show them these terms so they
know their rights.
Developers that use the GNU GPL protect your rights with two steps:
(1) assert copyright on the software, and (2) offer you this License
giving you legal permission to copy, distribute and/or modify it.
For the developers' and authors' protection, the GPL clearly explains
that there is no warranty for this free software. For both users' and
authors' sake, the GPL requires that modified versions be marked as
changed, so that their problems will not be attributed erroneously to
authors of previous versions.
Some devices are designed to deny users access to install or run
modified versions of the software inside them, although the manufacturer
can do so. This is fundamentally incompatible with the aim of
protecting users' freedom to change the software. The systematic
pattern of such abuse occurs in the area of products for individuals to
use, which is precisely where it is most unacceptable. Therefore, we
have designed this version of the GPL to prohibit the practice for those
products. If such problems arise substantially in other domains, we
stand ready to extend this provision to those domains in future versions
of the GPL, as needed to protect the freedom of users.
Finally, every program is threatened constantly by software patents.
States should not allow patents to restrict development and use of
software on general-purpose computers, but in those that do, we wish to
avoid the special danger that patents applied to a free program could
make it effectively proprietary. To prevent this, the GPL assures that
patents cannot be used to render the program non-free.
The precise terms and conditions for copying, distribution and
modification follow.
TERMS AND CONDITIONS
0. Definitions.
"This License" refers to version 3 of the GNU General Public License.
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
"The Program" refers to any copyrightable work licensed under this
License. Each licensee is addressed as "you". "Licensees" and
"recipients" may be individuals or organizations.
To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy. The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.
A "covered work" means either the unmodified Program or a work based
on the Program.
To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
To "convey" a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.
An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
1. Source Code.
The "source code" for a work means the preferred form of the work
for making modifications to it. "Object code" means any non-source
form of a work.
A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.
The Corresponding Source for a work in source code form is that
same work.
2. Basic Permissions.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force. You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
Conveying under any other circumstances is permitted solely under
the conditions stated below. Sublicensing is not allowed; section 10
makes it unnecessary.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.
4. Conveying Verbatim Copies.
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to
"keep intact all notices".
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the
Corresponding Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission, provided
you inform other peers where the object code and Corresponding
Source of the work are being offered to the general public at no
charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling. In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage. For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product. A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.
"Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source. The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed. Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
"Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law. If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material, or
requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors or
authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions of
it) with contractual assumptions of liability to the recipient, for
any liability that these contractual assumptions directly impose on
those licensors and authors.
All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or
run a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's "contributor version".
A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License. You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all. For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
13. Use with the GNU Affero General Public License.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU Affero General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the special requirements of the GNU Affero General Public License,
section 13, concerning interaction through a network will apply to the
combination as such.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
the GNU General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies that a certain numbered version of the GNU General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
GNU General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
versions of the GNU General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
Copyright (C)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
Also add information on how to contact you by electronic and paper mail.
If the program does terminal interaction, make it output a short
notice like this when it starts in an interactive mode:
Copyright (C)
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, your program's commands
might be different; for a GUI interface, you would use an "about box".
You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU GPL, see
.
The GNU General Public License does not permit incorporating your program
into proprietary programs. If your program is a subroutine library, you
may consider it more useful to permit linking proprietary applications with
the library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License. But first, please read
.
muscle-5.1.0/README.md 0000664 0000000 0000000 00000002353 14244530626 0014275 0 ustar 00root root 0000000 0000000 
MUSCLE is widely-used software for making multiple alignments of biological sequences.
Version 5 of MUSCLE achieves highest scores on Balibase, Bralibase and Balifam benchmark tests and scales to thousands of sequences on a commodity desktop computer.
This version supports generating an ensemble of alternative alignments with the same high accuracy obtained with default parameters. By comparing downstream predictions from different alignments, such as trees, a biologist can evaluation the robustness of conclusions against alignment errors.
### Downloads
Binary files are self-contained, no dependencies.
https://github.com/rcedgar/muscle/releases
### Documentation
[Muscle v5 home page](https://drive5.com/muscle5)
[Manual](https://drive5.com/muscle5/manual)
### Building MUSCLE from source
[https://github.com/rcedgar/muscle/wiki/Building-MUSCLE](https://github.com/rcedgar/muscle/wiki/Building-MUSCLE)
### Reference
R.C. Edgar (2021) "MUSCLE v5 enables improved estimates of phylogenetic tree confidence by ensemble bootstrapping"
[https://www.biorxiv.org/content/10.1101/2021.06.20.449169v1.full.pdf](https://www.biorxiv.org/content/10.1101/2021.06.20.449169v1.full.pdf)
muscle-5.1.0/src/ 0000775 0000000 0000000 00000000000 14244530626 0013602 5 ustar 00root root 0000000 0000000 muscle-5.1.0/src/Makefile 0000664 0000000 0000000 00000002615 14244530626 0015246 0 ustar 00root root 0000000 0000000 # The $(OS) variable is the o/s name returned by uname, which is
# used as the sub-directory name under src/ where object files
# and the executable are stored. This allows several target
# operating systems in the same directory structure.
# Typical values are:
# Platform Value of $(OS)
# -------- --------------
# Linux linux
# Mac OSX Darwin
# Cygwin CYGWIN_NT-10.0
# Building on Mac OSX is challenging because Apple does not support gcc or
# the OMP threading library. Hacks to install gcc and OMP vary by OSX release.
# This Makefile works with the AWS Catalina v10.15.7 AMI. With this AMI,
# running 'brew install gcc' currently installs gcc v11.
OS := $(shell uname)
CPPFLAGS := $(CPPFLAGS) -DNDEBUG -pthread
CXX := g++
ifeq ($(OS),Darwin)
CXX := g++-11
endif
CXXFLAGS := $(CXXFLAGS) -O3 -fopenmp -ffast-math
LDFLAGS := $(LDFLAGS) -O3 -fopenmp -pthread -lpthread ${LDFLAGS2}
HDRS := $(shell echo *.h)
OBJS := $(shell echo *.cpp | sed "-es/^/$(OS)\//" | sed "-es/ / $(OS)\//g" | sed "-es/\.cpp/.o/g")
SRCS := $(shell ls *.cpp *.h)
.PHONY: clean
$(OS)/muscle : gitver.txt $(OS)/ $(OBJS)
$(CXX) $(LDFLAGS) $(OBJS) -o $@
# Warning: do not add -d option to strip, this is not portable
strip $(OS)/muscle
gitver.txt : $(SRCS)
bash ./gitver.bash
$(OS)/ :
mkdir -p $(OS)/
$(OS)/%.o : %.cpp $(HDRS)
$(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $<
clean:
rm -rf gitver.txt $(OS)/
muscle-5.1.0/src/addconfseq.cpp 0000664 0000000 0000000 00000004215 14244530626 0016417 0 ustar 00root root 0000000 0000000 #include "muscle.h"
#include "ensemble.h"
#include "qscorer.h"
static char ConfToChar1(double Conf)
{
asserta(Conf >= 0 && Conf <= 1);
uint Tenth = uint(Conf*10);
asserta(Tenth >= 0 && Tenth <= 10);
if (Tenth == 10)
return '+';
return '0' + Tenth;
}
static char ConfToChar2(double Conf)
{
asserta(Conf >= 0 && Conf <= 1);
uint H = uint(Conf*100);
asserta(H >= 0 && H <= 100);
if (H == 100)
return '+';
return '0' + H%10;
}
static void Do1(FILE *fOut, const Ensemble &E, uint MSAIndex,
const string &ConfLabel, int Dec)
{
const MSA &M = E.GetMSA(MSAIndex);
const uint ColCount = M.GetColCount();
string ConfSeq;
for (uint ColIndex = 0; ColIndex < ColCount; ++ColIndex)
{
double Conf = E.GetConf_MSACol(MSAIndex, ColIndex);
char c = '?';
switch (Dec)
{
case 1:
c = ConfToChar1(Conf);
break;
case 2:
c = ConfToChar2(Conf);
break;
default:
asserta(false);
}
ConfSeq += c;
}
Pf(fOut, ">%s\n", ConfLabel.c_str());
Pf(fOut, "%s\n", ConfSeq.c_str());
}
void cmd_addconfseq()
{
const string &InputFileName = opt(addconfseq);
const string RefFileName = opt(ref);
const string &OutputFileName = opt(output);
string ConfLabel = "_conf_";
if (optset_label)
ConfLabel = opt(label);
MSA Ref;
if (optset_ref)
Ref.FromFASTAFile_PreserveCase(RefFileName);
Ensemble E;
E.FromFile(InputFileName);
if (optset_ref)
E.SortMSA(Ref);
FILE *fOut = CreateStdioFile(OutputFileName);
const uint MSACount = E.GetMSACount();
const uint SeqCount = E.GetSeqCount();
for (uint MSAIndex = 0; MSAIndex < MSACount; ++MSAIndex)
{
const string &MSAName = E.GetMSAName(MSAIndex);
Pf(fOut, "<%s\n", MSAName.c_str());
const MSA &M = E.GetMSA(MSAIndex);
const uint ColCount = M.GetColCount();
const uint MSASeqCount = M.GetSeqCount();
asserta(MSASeqCount == SeqCount);
Do1(fOut, E, MSAIndex, ConfLabel, 1);
Do1(fOut, E, MSAIndex, ConfLabel + "2", 2);
for (uint SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
{
const char *S = M.m_szSeqs[SeqIndex];
const char *Label = M.m_szNames[SeqIndex];
Pf(fOut, ">%s\n", Label);
Pf(fOut, "%*.*s\n", ColCount, ColCount, S);
}
}
CloseStdioFile(fOut);
}
muscle-5.1.0/src/align.cpp 0000664 0000000 0000000 00000007355 14244530626 0015412 0 ustar 00root root 0000000 0000000 #include "muscle.h"
void MakeReplicateFileName(const string &Pattern, TREEPERM TP,
uint PerturbSeed, string &FileName)
{
FileName.clear();
size_t pos = Pattern.find('@');
if (pos == string::npos)
Die("'@' not found in '%s'", Pattern.c_str());
for (size_t i = 0; i < pos; ++i)
FileName += Pattern[i];
Psa(FileName, "%s.%u", TREEPERMToStr(TP), PerturbSeed);
for (size_t i = pos+1; i < SIZE(Pattern); ++i)
FileName += Pattern[i];
}
static void Align(MPCFlat &M, MultiSequence &InputSeqs,
uint PerturbSeed, TREEPERM TP, bool WriteEfaHdr, FILE *fOut)
{
if (fOut == 0)
return;
bool Nucleo = (g_Alpha == ALPHA_Nucleo);
HMMParams HP;
HP.FromDefaults(Nucleo);
if (PerturbSeed > 0)
{
ResetRand(PerturbSeed);
HP.PerturbProbs(PerturbSeed);
}
HP.ToPairHMM();
M.m_TreePerm = TP;
M.Run(&InputSeqs);
asserta(M.m_MSA != 0);
if (WriteEfaHdr)
{
const char *TPStr = TREEPERMToStr(TP);
fprintf(fOut, "<%s.%u\n", TPStr, PerturbSeed);
}
M.m_MSA->WriteMFA(fOut);
}
void cmd_align()
{
MultiSequence InputSeqs;
InputSeqs.LoadMFA(opt(align), true);
const uint InputSeqCount = InputSeqs.GetSeqCount();
const string &OutputPattern = opt(output);
if (OutputPattern.empty())
Die("Must set -output");
double MeanSeqLength = InputSeqs.GetMeanSeqLength();
uint MaxSeqLength = InputSeqs.GetMaxSeqLength();
ProgressLog("Input: %u seqs, avg length %.0f, max %u\n\n",
InputSeqCount, MeanSeqLength, MaxSeqLength);
if (InputSeqCount > 1000)
Warning(">1k sequences, may be slow or use excessive memory, consider using -super5");
bool OutputWildcard = OutputPattern.find('@') != string::npos;
FILE *fOut = 0;
bool IsNucleo = InputSeqs.GuessIsNucleo();
if (IsNucleo)
SetAlpha(ALPHA_Nucleo);
else
SetAlpha(ALPHA_Amino);
MPCFlat M;
if (optset_consiters)
M.m_ConsistencyIterCount = opt(consiters);
if (optset_refineiters)
M.m_RefineIterCount = opt(refineiters);
if (opt(stratified) && opt(diversified))
Die("Cannot set both -stratified and -diversified");
if (opt(stratified) || opt(diversified))
{
if (optset_perm || optset_perturb)
Die("Cannot set -perm or -perturb with -stratified or -diversified");
}
uint RepCount = 1;
if (opt(stratified))
RepCount = 4;
else if (opt(diversified))
RepCount = 100;
if (optset_replicates)
RepCount = opt(replicates);
if (RepCount == 1)
{
uint PerturbSeed = 0;
if (optset_perturb)
PerturbSeed = opt(perturb);
TREEPERM TP = TP_None;
if (optset_perm)
TP = StrToTREEPERM(opt(perm));
if (TP == TP_All)
Die("-perm all not supported, use -stratified");
string OutputFileName;
if (OutputWildcard)
MakeReplicateFileName(OutputPattern, TP, PerturbSeed, OutputFileName);
else
OutputFileName = OutputPattern;
fOut = CreateStdioFile(OutputFileName);
Align(M, InputSeqs, PerturbSeed, TP, false, fOut);
CloseStdioFile(fOut);
return;
}
bool Stratified = false;
if (opt(stratified))
{
Stratified = true;
RepCount *= 4;
if (optset_perm)
Die("Cannot set both -perm and -stratified");
asserta(RepCount > 0);
}
string OutputFileName;
if (!OutputWildcard)
fOut = CreateStdioFile(OutputPattern);
for (uint RepIndex = 0; RepIndex < RepCount; ++RepIndex)
{
uint PerturbSeed = (Stratified ? RepIndex/4 : RepIndex);
TREEPERM TP = (optset_perm ?
StrToTREEPERM(opt(perm)) : TREEPERM(RepIndex%4));
ProgressLog("Replicate %u/%u, %s.%u\n",
RepIndex+1, RepCount, TREEPERMToStr(TP), PerturbSeed);
if (OutputWildcard)
{
MakeReplicateFileName(OutputPattern, TP, PerturbSeed, OutputFileName);
fOut = CreateStdioFile(OutputFileName);
}
bool WriteEfaHeader = !OutputWildcard;
Align(M, InputSeqs, PerturbSeed, TP, WriteEfaHeader, fOut);
if (OutputWildcard)
CloseStdioFile(fOut);
}
if (!OutputWildcard)
CloseStdioFile(fOut);
}
muscle-5.1.0/src/alignpairflat.cpp 0000664 0000000 0000000 00000002044 14244530626 0017123 0 ustar 00root root 0000000 0000000 #include "muscle.h"
float AlignPairFlat_SparsePost(const Sequence *Seq1, const Sequence *Seq2,
string &Path, MySparseMx *SparsePost)
{
InitProbcons();
uint L1 = Seq1->GetLength();
uint L2 = Seq2->GetLength();
asserta(L1 > 0);
asserta(L2 > 0);
const byte *ByteSeq1 = Seq1->GetBytePtr();
const byte *ByteSeq2 = Seq2->GetBytePtr();
float *Fwd = AllocFB(L1, L2);
float *Bwd = AllocFB(L1, L2);
float *Post = AllocPost(L1, L2);
CalcFwdFlat(ByteSeq1, L1, ByteSeq2, L2, Fwd);
CalcBwdFlat(ByteSeq1, L1, ByteSeq2, L2, Bwd);
CalcPostFlat(Fwd, Bwd, L1, L2, Post);
delete Fwd;
delete Bwd;
float *DPRows = AllocDPRows(L1, L2);
char *TB = AllocTB(L1, L2);
float Score = CalcAlnFlat(Post, L1, L2, DPRows, TB, Path);
if (SparsePost != 0)
SparsePost->FromPost(Post, L1, L2);
delete Post;
delete DPRows;
delete TB;
asserta(L1 > 0 && L2 > 0);
float EA = Score/min(L1, L2);
return EA;
}
float AlignPairFlat(const Sequence *Seq1, const Sequence *Seq2, string &Path)
{
float EA = AlignPairFlat_SparsePost(Seq1, Seq2, Path, 0);
return EA;
}
muscle-5.1.0/src/allocflat.cpp 0000664 0000000 0000000 00000002150 14244530626 0016245 0 ustar 00root root 0000000 0000000 #include "muscle.h"
uint64 GetFBSize(uint LX, uint LY)
{
uint64 Size64 = uint64(LX + 1)*uint64(LY + 1)*HMMSTATE_COUNT;
if (double(Size64) > 4e9)
Die("Memory object too large due to sequence lengths %u, %u", LX, LY);
uint Size = uint(Size64);
asserta(Size == uint(Size64));
return Size;
}
uint64 GetPostSize(uint LX, uint LY)
{
uint64 Size64 = uint64(LX)*uint64(LY);
uint Size = uint(Size64);
asserta(uint64(Size) == Size64);
return Size;
}
uint64 GetDPRowsSize(uint LX, uint LY)
{
uint64 Size64 = 2*uint64(LY + 1);
uint Size = uint(Size64);
asserta(uint64(Size) == Size64);
return Size;
}
uint64 GetTBSize(uint LX, uint LY)
{
uint64 Size64 = uint64(LX + 1)*uint64(LY + 1);
uint Size = uint(Size64);
asserta(uint64(Size) == Size64);
return Size;
}
float *AllocFB(uint LX, uint LY)
{
return myalloc64(float, GetFBSize(LX, LY));
}
float *AllocPost(uint LX, uint LY)
{
return myalloc64(float, GetPostSize(LX, LY));
}
float *AllocDPRows(uint LX, uint LY)
{
return myalloc64(float, GetDPRowsSize(LX, LY));
}
char *AllocTB(uint LX, uint LY)
{
return myalloc64(char, GetTBSize(LX, LY));
}
muscle-5.1.0/src/alnalnsflat.cpp 0000664 0000000 0000000 00000002315 14244530626 0016606 0 ustar 00root root 0000000 0000000 #include "muscle.h"
#include "mpcflat.h"
float CalcAlnFlat(const float *Post, uint LX, uint LY,
float *DPRows, char *TB, string &Path);
MultiSequence *MPCFlat::AlignAlns(const MultiSequence &MSA1,
const MultiSequence &MSA2)
{
const uint SeqCount1 = MSA1.GetSeqCount();
const uint SeqCount2 = MSA2.GetSeqCount();
const uint ColCount1 = MSA1.GetColCount();
const uint ColCount2 = MSA2.GetColCount();
float *Post = AllocPost(ColCount1, ColCount2);
BuildPost(MSA1, MSA2, Post);
float *DPRows = AllocDPRows(ColCount1, ColCount2);
char *TB = AllocTB(ColCount1, ColCount2);
string Path;
CalcAlnFlat(Post, ColCount1, ColCount2, DPRows, TB, Path);
myfree(Post);
myfree(DPRows);
myfree(TB);
MultiSequence *result = new MultiSequence();
for (uint SeqIndex1 = 0; SeqIndex1 < SeqCount1; ++SeqIndex1)
{
const Sequence *InputRow = MSA1.GetSequence(SeqIndex1);
Sequence *AlignedRow = InputRow->AddGapsPath(Path, 'X');
result->AddSequence(AlignedRow, true);
}
for (uint SeqIndex2 = 0; SeqIndex2 < SeqCount2; ++SeqIndex2)
{
const Sequence *InputRow = MSA2.GetSequence(SeqIndex2);
Sequence *AlignedRow = InputRow->AddGapsPath(Path, 'Y');
result->AddSequence(AlignedRow, true);
}
return result;
}
muscle-5.1.0/src/alnmsasflat.cpp 0000664 0000000 0000000 00000002377 14244530626 0016624 0 ustar 00root root 0000000 0000000 #include "muscle.h"
#include "locallock.h"
float AlignMSAsFlat(const string &ProgressStr,
const MultiSequence &MSA1, const MultiSequence &MSA2,
uint TargetPairCount, string &Path)
{
const uint SeqCount1 = MSA1.GetNumSequences();
const uint SeqCount2 = MSA2.GetNumSequences();
asserta(SeqCount1 > 0);
asserta(SeqCount2 > 0);
asserta(MSA1.IsAligned());
asserta(MSA2.IsAligned());
const uint ColCount1 = MSA1.GetColCount();
const uint ColCount2 = MSA2.GetColCount();
vector SeqIndexes1;
vector SeqIndexes2;
GetPairs(SeqCount1, SeqCount2, TargetPairCount,
SeqIndexes1, SeqIndexes2);
const uint PairCount = SIZE(SeqIndexes1);
asserta(SIZE(SeqIndexes2) == PairCount);
vector SparseMxs;
float AvgEA = GetPostPairsAlignedFlat(ProgressStr, MSA1, MSA2,
SeqIndexes1, SeqIndexes2, SparseMxs);
const uint L1 = ColCount1;
const uint L2 = ColCount2;
float *Post = AllocPost(L1, L2);
CalcPosteriorFlat3(MSA1, MSA2, SeqIndexes1, SeqIndexes2, SparseMxs, Post);
for (uint i = 0; i < PairCount; ++i)
delete SparseMxs[i];
SparseMxs.clear();
float *DPRows = AllocDPRows(L1, L2);
char *TB = AllocTB(L1, L2);
CalcAlnFlat(Post, ColCount1, ColCount2, DPRows, TB, Path);
delete Post;
delete DPRows;
delete TB;
return AvgEA;
}
muscle-5.1.0/src/alnmsasflat3.cpp 0000664 0000000 0000000 00000002460 14244530626 0016700 0 ustar 00root root 0000000 0000000 #include "muscle.h"
float AlignMSAsFlat3(const string &ProgressStr,
const MultiSequence &MSA1, const MultiSequence &MSA2,
const vector &SparseMxVec,
uint Index1, uint Index2,
uint TargetPairCount, string &Path)
{
const uint SeqCount1 = MSA1.GetNumSequences();
const uint SeqCount2 = MSA2.GetNumSequences();
asserta(SeqCount1 > 0);
asserta(SeqCount2 > 0);
asserta(MSA1.IsAligned());
asserta(MSA2.IsAligned());
const uint ColCount1 = MSA1.GetColCount();
const uint ColCount2 = MSA2.GetColCount();
vector SeqIndexes1;
vector SeqIndexes2;
GetPairs(SeqCount1, SeqCount2, TargetPairCount,
SeqIndexes1, SeqIndexes2);
const uint PairCount = SIZE(SeqIndexes1);
asserta(SIZE(SeqIndexes2) == PairCount);
vector SparseMxs;
float AvgEA = GetPostPairsAlignedFlat(ProgressStr, MSA1, MSA2,
SeqIndexes1, SeqIndexes2, SparseMxs);
const uint L1 = ColCount1;
const uint L2 = ColCount2;
float *Post = AllocPost(L1, L2);
CalcPosteriorFlat3(MSA1, MSA2, SeqIndexes1, SeqIndexes2, SparseMxs, Post);
for (uint i = 0; i < PairCount; ++i)
delete SparseMxs[i];
SparseMxs.clear();
float *DPRows = AllocDPRows(L1, L2);
char *TB = AllocTB(L1, L2);
CalcAlnFlat(Post, ColCount1, ColCount2, DPRows, TB, Path);
delete Post;
delete DPRows;
delete TB;
return AvgEA;
}
muscle-5.1.0/src/alpha.cpp 0000664 0000000 0000000 00000013232 14244530626 0015374 0 ustar 00root root 0000000 0000000 #include "muscle.h"
/***
From Bioperl docs:
Extended DNA / RNA alphabet
------------------------------------------
Symbol Meaning Nucleic Acid
------------------------------------------
A A Adenine
C C Cytosine
G G Guanine
T T Thymine
U U Uracil
M A or C
R A or G
W A or T
S C or G
Y C or T
K G or T
V A or C or G
H A or C or T
D A or G or T
B C or G or T
X G or A or T or C
N G or A or T or C
IUPAC-IUB SYMBOLS FOR NUCLEOTIDE NOMENCLATURE:
Cornish-Bowden (1985) Nucl. Acids Res. 13: 3021-3030.
***/
unsigned g_CharToLetter[MAX_CHAR];
unsigned g_CharToLetterEx[MAX_CHAR];
char g_LetterToChar[MAX_ALPHA];
char g_LetterExToChar[MAX_ALPHA_EX];
char g_UnalignChar[MAX_CHAR];
char g_AlignChar[MAX_CHAR];
bool g_IsWildcardChar[MAX_CHAR];
bool g_IsResidueChar[MAX_CHAR];
ALPHA g_Alpha = ALPHA_Undefined;
unsigned g_AlphaSize = 0;
#define Res(c, Letter) \
{ \
const unsigned char Upper = (unsigned char) toupper(c); \
const unsigned char Lower = (unsigned char) tolower(c); \
g_CharToLetter[Upper] = Letter; \
g_CharToLetter[Lower] = Letter; \
g_CharToLetterEx[Upper] = Letter; \
g_CharToLetterEx[Lower] = Letter; \
g_LetterToChar[Letter] = Upper; \
g_LetterExToChar[Letter] = Upper; \
g_IsResidueChar[Upper] = true; \
g_IsResidueChar[Lower] = true; \
g_AlignChar[Upper] = Upper; \
g_AlignChar[Lower] = Upper; \
g_UnalignChar[Upper] = Lower; \
g_UnalignChar[Lower] = Lower; \
}
#define Wild(c, Letter) \
{ \
const unsigned char Upper = (unsigned char) toupper(c); \
const unsigned char Lower = (unsigned char) tolower(c); \
g_CharToLetterEx[Upper] = Letter; \
g_CharToLetterEx[Lower] = Letter; \
g_LetterExToChar[Letter] = Upper; \
g_IsResidueChar[Upper] = true; \
g_IsResidueChar[Lower] = true; \
g_AlignChar[Upper] = Upper; \
g_AlignChar[Lower] = Upper; \
g_UnalignChar[Upper] = Lower; \
g_UnalignChar[Lower] = Lower; \
g_IsWildcardChar[Lower] = true; \
g_IsWildcardChar[Upper] = true; \
}
static unsigned GetAlphaSize(ALPHA Alpha)
{
switch (Alpha)
{
case ALPHA_Amino:
return 20;
case ALPHA_Nucleo:
return 4;
}
Die("Invalid Alpha=%d", Alpha);
return 0;
}
static void InitArrays()
{
memset(g_CharToLetter, 0xff, sizeof(g_CharToLetter));
memset(g_CharToLetterEx, 0xff, sizeof(g_CharToLetterEx));
memset(g_LetterToChar, '?', sizeof(g_LetterToChar));
memset(g_LetterExToChar, '?', sizeof(g_LetterExToChar));
memset(g_AlignChar, '?', sizeof(g_UnalignChar));
memset(g_UnalignChar, '?', sizeof(g_UnalignChar));
memset(g_IsWildcardChar, 0, sizeof(g_IsWildcardChar));
}
static void SetGapChar(char c)
{
unsigned char u = (unsigned char) c;
g_CharToLetterEx[u] = AX_GAP;
g_LetterExToChar[AX_GAP] = u;
g_AlignChar[u] = u;
g_UnalignChar[u] = u;
}
static void SetAlphaNucleo()
{
Res('A', NX_A)
Res('C', NX_C)
Res('G', NX_G)
Res('T', NX_T)
Res('U', NX_T)
Wild('M', NX_M)
Wild('R', NX_R)
Wild('W', NX_W)
Wild('S', NX_S)
Wild('Y', NX_Y)
Wild('K', NX_K)
Wild('V', NX_V)
Wild('H', NX_H)
Wild('D', NX_D)
Wild('B', NX_B)
Wild('X', NX_X)
Wild('N', NX_N)
}
static void SetAlphaDNA()
{
Res('A', NX_A)
Res('C', NX_C)
Res('G', NX_G)
Res('T', NX_T)
Wild('M', NX_M)
Wild('R', NX_R)
Wild('W', NX_W)
Wild('S', NX_S)
Wild('Y', NX_Y)
Wild('K', NX_K)
Wild('V', NX_V)
Wild('H', NX_H)
Wild('D', NX_D)
Wild('B', NX_B)
Wild('X', NX_X)
Wild('N', NX_N)
}
static void SetAlphaRNA()
{
Res('A', NX_A)
Res('C', NX_C)
Res('G', NX_G)
Res('U', NX_U)
Res('T', NX_T)
Wild('M', NX_M)
Wild('R', NX_R)
Wild('W', NX_W)
Wild('S', NX_S)
Wild('Y', NX_Y)
Wild('K', NX_K)
Wild('V', NX_V)
Wild('H', NX_H)
Wild('D', NX_D)
Wild('B', NX_B)
Wild('X', NX_X)
Wild('N', NX_N)
}
static void SetAlphaAmino()
{
Res('A', AX_A)
Res('C', AX_C)
Res('D', AX_D)
Res('E', AX_E)
Res('F', AX_F)
Res('G', AX_G)
Res('H', AX_H)
Res('I', AX_I)
Res('K', AX_K)
Res('L', AX_L)
Res('M', AX_M)
Res('N', AX_N)
Res('P', AX_P)
Res('Q', AX_Q)
Res('R', AX_R)
Res('S', AX_S)
Res('T', AX_T)
Res('V', AX_V)
Res('W', AX_W)
Res('Y', AX_Y)
Wild('B', AX_B)
Wild('X', AX_X)
Wild('Z', AX_Z)
}
void SetAlpha(ALPHA Alpha)
{
InitArrays();
SetGapChar('.');
SetGapChar('-');
switch (Alpha)
{
case ALPHA_Amino:
SetAlphaAmino();
break;
case ALPHA_Nucleo:
SetAlphaNucleo();
break;
default:
Die("Invalid Alpha=%d", Alpha);
}
g_AlphaSize = GetAlphaSize(Alpha);
g_Alpha = Alpha;
}
char GetWildcardChar()
{
switch (g_Alpha)
{
case ALPHA_Amino:
return 'X';
case ALPHA_Nucleo:
return 'N';
default:
Die("Invalid Alpha=%d", g_Alpha);
}
return '?';
}
bool IsNucleo(char c)
{
return strchr("ACGTURYNacgturyn", c) != 0;
}
bool IsDNA(char c)
{
return strchr("AGCTNagctn", c) != 0;
}
bool IsRNA(char c)
{
return strchr("AGCUNagcun", c) != 0;
}
static char InvalidLetters[256];
static int InvalidLetterCount = 0;
void ClearInvalidLetterWarning()
{
memset(InvalidLetters, 0, 256);
}
void InvalidLetterWarning(char c, char w)
{
InvalidLetters[(unsigned char) c] = 1;
++InvalidLetterCount;
}
void ReportInvalidLetters()
{
if (0 == InvalidLetterCount)
return;
char Str[257];
memset(Str, 0, 257);
int n = 0;
for (int i = 0; i < 256; ++i)
{
if (InvalidLetters[i])
Str[n++] = (char) i;
}
Warning("Invalid letters found: %s", Str);
}
muscle-5.1.0/src/alpha.h 0000664 0000000 0000000 00000004133 14244530626 0015041 0 ustar 00root root 0000000 0000000 #ifndef alpha_h
#define alpha_h
enum ALPHA
{
ALPHA_Undefined,
ALPHA_Nucleo,
ALPHA_Amino
};
bool StrHasAmino(const char *Str);
bool StrHasGap(const char *Str);
void ClearInvalidLetterWarning();
void InvalidLetterWarning(char c, char w);
void ReportInvalidLetters();
extern unsigned g_CharToLetter[];
extern unsigned g_CharToLetterEx[];
extern char g_LetterToChar[];
extern char g_LetterExToChar[];
extern char g_UnalignChar[];
extern char g_AlignChar[];
extern bool g_IsWildcardChar[];
extern bool g_IsResidueChar[];
#define CharToLetter(c) (g_CharToLetter[(unsigned char) (c)])
#define CharToLetterEx(c) (g_CharToLetterEx[(unsigned char) (c)])
#define LetterToChar(u) (g_LetterToChar[u])
#define LetterExToChar(u) (g_LetterExToChar[u])
#define IsResidueChar(c) (g_IsResidueChar[(unsigned char) (c)])
#define IsGapChar(c) ('-' == (c) || '.' == (c))
#define IsWildcardChar(c) (g_IsWildcardChar[(unsigned char) (c)])
#define AlignChar(c) (g_AlignChar[(unsigned char) (c)])
#define UnalignChar(c) (g_UnalignChar[(unsigned char) (c)])
// AX=Amino alphabet with eXtensions (B, Z and X)
enum AX
{
AX_A,
AX_C,
AX_D,
AX_E,
AX_F,
AX_G,
AX_H,
AX_I,
AX_K,
AX_L,
AX_M,
AX_N,
AX_P,
AX_Q,
AX_R,
AX_S,
AX_T,
AX_V,
AX_W,
AX_Y,
AX_X, // Any
AX_B, // D or N
AX_Z, // E or Q
AX_GAP,
};
const unsigned AX_COUNT = AX_GAP + 1;
// NX=Nucleotide alphabet with extensions
enum NX
{
NX_A,
NX_C,
NX_G,
NX_T,
NX_U = NX_T,
NX_M, // AC
NX_R, // AG
NX_W, // AT
NX_S, // CG
NX_Y, // CT
NX_K, // GT
NX_V, // ACG
NX_H, // ACT
NX_D, // AGT
NX_B, // CGT
NX_X, // GATC
NX_N, // GATC
NX_GAP
};
const unsigned NX_COUNT = NX_GAP + 1;
const unsigned MAX_ALPHA = 20;
const unsigned MAX_ALPHA_EX = AX_COUNT;
const unsigned MAX_CHAR = 256;
extern ALPHA g_Alpha;
extern unsigned g_AlphaSize;
void SetAlpha(ALPHA Alpha);
char GetWildcardChar();
bool IsNucleo(char c);
bool IsDNA(char c);
bool IsRNA(char c);
static inline bool isgap(char c)
{
return c == '-' || c == '.';
}
extern byte g_CharToLetterNucleo[256];
extern byte g_CharToLetterAmino[256];
#endif // alpha_h
muscle-5.1.0/src/alpha3.cpp 0000664 0000000 0000000 00000403234 14244530626 0015464 0 ustar 00root root 0000000 0000000 // Generated by /e/r/py/alphac.py
#include "myutils.h"
#include "alpha3.h"
byte g_AminoAcidChars[20] =
{
'A','C','D','E','F','G','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y',
};
byte g_CharToLetterAminoStop[256] =
{
INVALID_LETTER, // [ 0] 0x00
INVALID_LETTER, // [ 1] 0x01
INVALID_LETTER, // [ 2] 0x02
INVALID_LETTER, // [ 3] 0x03
INVALID_LETTER, // [ 4] 0x04
INVALID_LETTER, // [ 5] 0x05
INVALID_LETTER, // [ 6] 0x06
INVALID_LETTER, // [ 7] 0x07
INVALID_LETTER, // [ 8] 0x08
INVALID_LETTER, // [ 9] 0x09
INVALID_LETTER, // [ 10] 0x0a
INVALID_LETTER, // [ 11] 0x0b
INVALID_LETTER, // [ 12] 0x0c
INVALID_LETTER, // [ 13] 0x0d
INVALID_LETTER, // [ 14] 0x0e
INVALID_LETTER, // [ 15] 0x0f
INVALID_LETTER, // [ 16] 0x10
INVALID_LETTER, // [ 17] 0x11
INVALID_LETTER, // [ 18] 0x12
INVALID_LETTER, // [ 19] 0x13
INVALID_LETTER, // [ 20] 0x14
INVALID_LETTER, // [ 21] 0x15
INVALID_LETTER, // [ 22] 0x16
INVALID_LETTER, // [ 23] 0x17
INVALID_LETTER, // [ 24] 0x18
INVALID_LETTER, // [ 25] 0x19
INVALID_LETTER, // [ 26] 0x1a
INVALID_LETTER, // [ 27] 0x1b
INVALID_LETTER, // [ 28] 0x1c
INVALID_LETTER, // [ 29] 0x1d
INVALID_LETTER, // [ 30] 0x1e
INVALID_LETTER, // [ 31] 0x1f
INVALID_LETTER, // [ 32] ' '
INVALID_LETTER, // [ 33] '!'
INVALID_LETTER, // [ 34] '"'
INVALID_LETTER, // [ 35] '#'
INVALID_LETTER, // [ 36] '$'
INVALID_LETTER, // [ 37] '%'
INVALID_LETTER, // [ 38] '&'
INVALID_LETTER, // [ 39] '''
INVALID_LETTER, // [ 40] '('
INVALID_LETTER, // [ 41] ')'
20 , // [ 42] '*' = STP
INVALID_LETTER, // [ 43] '+'
INVALID_LETTER, // [ 44] ','
INVALID_LETTER, // [ 45] '-'
INVALID_LETTER, // [ 46] '.'
INVALID_LETTER, // [ 47] '/'
INVALID_LETTER, // [ 48] '0'
INVALID_LETTER, // [ 49] '1'
INVALID_LETTER, // [ 50] '2'
INVALID_LETTER, // [ 51] '3'
INVALID_LETTER, // [ 52] '4'
INVALID_LETTER, // [ 53] '5'
INVALID_LETTER, // [ 54] '6'
INVALID_LETTER, // [ 55] '7'
INVALID_LETTER, // [ 56] '8'
INVALID_LETTER, // [ 57] '9'
INVALID_LETTER, // [ 58] ':'
INVALID_LETTER, // [ 59] ';'
INVALID_LETTER, // [ 60] '<'
INVALID_LETTER, // [ 61] '='
INVALID_LETTER, // [ 62] '>'
INVALID_LETTER, // [ 63] '?'
INVALID_LETTER, // [ 64] '@'
0 , // [ 65] 'A' = Ala
INVALID_LETTER, // [ 66] 'B'
1 , // [ 67] 'C' = Cys
2 , // [ 68] 'D' = Asp
3 , // [ 69] 'E' = Glu
4 , // [ 70] 'F' = Phe
5 , // [ 71] 'G' = Gly
6 , // [ 72] 'H' = His
7 , // [ 73] 'I' = Ile
INVALID_LETTER, // [ 74] 'J'
8 , // [ 75] 'K' = Lys
9 , // [ 76] 'L' = Leu
10 , // [ 77] 'M' = Met
11 , // [ 78] 'N' = Asn
INVALID_LETTER, // [ 79] 'O'
12 , // [ 80] 'P' = Pro
13 , // [ 81] 'Q' = Gln
14 , // [ 82] 'R' = Arg
15 , // [ 83] 'S' = Ser
16 , // [ 84] 'T' = Thr
INVALID_LETTER, // [ 85] 'U'
17 , // [ 86] 'V' = Val
18 , // [ 87] 'W' = Trp
INVALID_LETTER, // [ 88] 'X'
19 , // [ 89] 'Y' = Tyr
INVALID_LETTER, // [ 90] 'Z'
INVALID_LETTER, // [ 91] '['
INVALID_LETTER, // [ 92] '\'
INVALID_LETTER, // [ 93] ']'
INVALID_LETTER, // [ 94] '^'
INVALID_LETTER, // [ 95] '_'
INVALID_LETTER, // [ 96] '`'
0 , // [ 97] 'a' = Ala
INVALID_LETTER, // [ 98] 'b'
1 , // [ 99] 'c' = Cys
2 , // [100] 'd' = Asp
3 , // [101] 'e' = Glu
4 , // [102] 'f' = Phe
5 , // [103] 'g' = Gly
6 , // [104] 'h' = His
7 , // [105] 'i' = Ile
INVALID_LETTER, // [106] 'j'
8 , // [107] 'k' = Lys
9 , // [108] 'l' = Leu
10 , // [109] 'm' = Met
11 , // [110] 'n' = Asn
INVALID_LETTER, // [111] 'o'
12 , // [112] 'p' = Pro
13 , // [113] 'q' = Gln
14 , // [114] 'r' = Arg
15 , // [115] 's' = Ser
16 , // [116] 't' = Thr
INVALID_LETTER, // [117] 'u'
17 , // [118] 'v' = Val
18 , // [119] 'w' = Trp
INVALID_LETTER, // [120] 'x'
19 , // [121] 'y' = Tyr
INVALID_LETTER, // [122] 'z'
INVALID_LETTER, // [123] '{'
INVALID_LETTER, // [124] '|'
INVALID_LETTER, // [125] '}'
INVALID_LETTER, // [126] '~'
INVALID_LETTER, // [127] 0x7f
INVALID_LETTER, // [128] 0x80
INVALID_LETTER, // [129] 0x81
INVALID_LETTER, // [130] 0x82
INVALID_LETTER, // [131] 0x83
INVALID_LETTER, // [132] 0x84
INVALID_LETTER, // [133] 0x85
INVALID_LETTER, // [134] 0x86
INVALID_LETTER, // [135] 0x87
INVALID_LETTER, // [136] 0x88
INVALID_LETTER, // [137] 0x89
INVALID_LETTER, // [138] 0x8a
INVALID_LETTER, // [139] 0x8b
INVALID_LETTER, // [140] 0x8c
INVALID_LETTER, // [141] 0x8d
INVALID_LETTER, // [142] 0x8e
INVALID_LETTER, // [143] 0x8f
INVALID_LETTER, // [144] 0x90
INVALID_LETTER, // [145] 0x91
INVALID_LETTER, // [146] 0x92
INVALID_LETTER, // [147] 0x93
INVALID_LETTER, // [148] 0x94
INVALID_LETTER, // [149] 0x95
INVALID_LETTER, // [150] 0x96
INVALID_LETTER, // [151] 0x97
INVALID_LETTER, // [152] 0x98
INVALID_LETTER, // [153] 0x99
INVALID_LETTER, // [154] 0x9a
INVALID_LETTER, // [155] 0x9b
INVALID_LETTER, // [156] 0x9c
INVALID_LETTER, // [157] 0x9d
INVALID_LETTER, // [158] 0x9e
INVALID_LETTER, // [159] 0x9f
INVALID_LETTER, // [160] 0xa0
INVALID_LETTER, // [161] 0xa1
INVALID_LETTER, // [162] 0xa2
INVALID_LETTER, // [163] 0xa3
INVALID_LETTER, // [164] 0xa4
INVALID_LETTER, // [165] 0xa5
INVALID_LETTER, // [166] 0xa6
INVALID_LETTER, // [167] 0xa7
INVALID_LETTER, // [168] 0xa8
INVALID_LETTER, // [169] 0xa9
INVALID_LETTER, // [170] 0xaa
INVALID_LETTER, // [171] 0xab
INVALID_LETTER, // [172] 0xac
INVALID_LETTER, // [173] 0xad
INVALID_LETTER, // [174] 0xae
INVALID_LETTER, // [175] 0xaf
INVALID_LETTER, // [176] 0xb0
INVALID_LETTER, // [177] 0xb1
INVALID_LETTER, // [178] 0xb2
INVALID_LETTER, // [179] 0xb3
INVALID_LETTER, // [180] 0xb4
INVALID_LETTER, // [181] 0xb5
INVALID_LETTER, // [182] 0xb6
INVALID_LETTER, // [183] 0xb7
INVALID_LETTER, // [184] 0xb8
INVALID_LETTER, // [185] 0xb9
INVALID_LETTER, // [186] 0xba
INVALID_LETTER, // [187] 0xbb
INVALID_LETTER, // [188] 0xbc
INVALID_LETTER, // [189] 0xbd
INVALID_LETTER, // [190] 0xbe
INVALID_LETTER, // [191] 0xbf
INVALID_LETTER, // [192] 0xc0
INVALID_LETTER, // [193] 0xc1
INVALID_LETTER, // [194] 0xc2
INVALID_LETTER, // [195] 0xc3
INVALID_LETTER, // [196] 0xc4
INVALID_LETTER, // [197] 0xc5
INVALID_LETTER, // [198] 0xc6
INVALID_LETTER, // [199] 0xc7
INVALID_LETTER, // [200] 0xc8
INVALID_LETTER, // [201] 0xc9
INVALID_LETTER, // [202] 0xca
INVALID_LETTER, // [203] 0xcb
INVALID_LETTER, // [204] 0xcc
INVALID_LETTER, // [205] 0xcd
INVALID_LETTER, // [206] 0xce
INVALID_LETTER, // [207] 0xcf
INVALID_LETTER, // [208] 0xd0
INVALID_LETTER, // [209] 0xd1
INVALID_LETTER, // [210] 0xd2
INVALID_LETTER, // [211] 0xd3
INVALID_LETTER, // [212] 0xd4
INVALID_LETTER, // [213] 0xd5
INVALID_LETTER, // [214] 0xd6
INVALID_LETTER, // [215] 0xd7
INVALID_LETTER, // [216] 0xd8
INVALID_LETTER, // [217] 0xd9
INVALID_LETTER, // [218] 0xda
INVALID_LETTER, // [219] 0xdb
INVALID_LETTER, // [220] 0xdc
INVALID_LETTER, // [221] 0xdd
INVALID_LETTER, // [222] 0xde
INVALID_LETTER, // [223] 0xdf
INVALID_LETTER, // [224] 0xe0
INVALID_LETTER, // [225] 0xe1
INVALID_LETTER, // [226] 0xe2
INVALID_LETTER, // [227] 0xe3
INVALID_LETTER, // [228] 0xe4
INVALID_LETTER, // [229] 0xe5
INVALID_LETTER, // [230] 0xe6
INVALID_LETTER, // [231] 0xe7
INVALID_LETTER, // [232] 0xe8
INVALID_LETTER, // [233] 0xe9
INVALID_LETTER, // [234] 0xea
INVALID_LETTER, // [235] 0xeb
INVALID_LETTER, // [236] 0xec
INVALID_LETTER, // [237] 0xed
INVALID_LETTER, // [238] 0xee
INVALID_LETTER, // [239] 0xef
INVALID_LETTER, // [240] 0xf0
INVALID_LETTER, // [241] 0xf1
INVALID_LETTER, // [242] 0xf2
INVALID_LETTER, // [243] 0xf3
INVALID_LETTER, // [244] 0xf4
INVALID_LETTER, // [245] 0xf5
INVALID_LETTER, // [246] 0xf6
INVALID_LETTER, // [247] 0xf7
INVALID_LETTER, // [248] 0xf8
INVALID_LETTER, // [249] 0xf9
INVALID_LETTER, // [250] 0xfa
INVALID_LETTER, // [251] 0xfb
INVALID_LETTER, // [252] 0xfc
INVALID_LETTER, // [253] 0xfd
INVALID_LETTER, // [254] 0xfe
INVALID_LETTER, // [255] 0xff
};
byte g_CharToLetterAminoGap[256] =
{
INVALID_LETTER, // [ 0] 0x00
INVALID_LETTER, // [ 1] 0x01
INVALID_LETTER, // [ 2] 0x02
INVALID_LETTER, // [ 3] 0x03
INVALID_LETTER, // [ 4] 0x04
INVALID_LETTER, // [ 5] 0x05
INVALID_LETTER, // [ 6] 0x06
INVALID_LETTER, // [ 7] 0x07
INVALID_LETTER, // [ 8] 0x08
INVALID_LETTER, // [ 9] 0x09
INVALID_LETTER, // [ 10] 0x0a
INVALID_LETTER, // [ 11] 0x0b
INVALID_LETTER, // [ 12] 0x0c
INVALID_LETTER, // [ 13] 0x0d
INVALID_LETTER, // [ 14] 0x0e
INVALID_LETTER, // [ 15] 0x0f
INVALID_LETTER, // [ 16] 0x10
INVALID_LETTER, // [ 17] 0x11
INVALID_LETTER, // [ 18] 0x12
INVALID_LETTER, // [ 19] 0x13
INVALID_LETTER, // [ 20] 0x14
INVALID_LETTER, // [ 21] 0x15
INVALID_LETTER, // [ 22] 0x16
INVALID_LETTER, // [ 23] 0x17
INVALID_LETTER, // [ 24] 0x18
INVALID_LETTER, // [ 25] 0x19
INVALID_LETTER, // [ 26] 0x1a
INVALID_LETTER, // [ 27] 0x1b
INVALID_LETTER, // [ 28] 0x1c
INVALID_LETTER, // [ 29] 0x1d
INVALID_LETTER, // [ 30] 0x1e
INVALID_LETTER, // [ 31] 0x1f
INVALID_LETTER, // [ 32] ' '
INVALID_LETTER, // [ 33] '!'
INVALID_LETTER, // [ 34] '"'
INVALID_LETTER, // [ 35] '#'
INVALID_LETTER, // [ 36] '$'
INVALID_LETTER, // [ 37] '%'
INVALID_LETTER, // [ 38] '&'
INVALID_LETTER, // [ 39] '''
INVALID_LETTER, // [ 40] '('
INVALID_LETTER, // [ 41] ')'
20 , // [ 42] '*' = STP
INVALID_LETTER, // [ 43] '+'
INVALID_LETTER, // [ 44] ','
20 , // [ 45] '-' gap
INVALID_LETTER, // [ 46] '.'
INVALID_LETTER, // [ 47] '/'
INVALID_LETTER, // [ 48] '0'
INVALID_LETTER, // [ 49] '1'
INVALID_LETTER, // [ 50] '2'
INVALID_LETTER, // [ 51] '3'
INVALID_LETTER, // [ 52] '4'
INVALID_LETTER, // [ 53] '5'
INVALID_LETTER, // [ 54] '6'
INVALID_LETTER, // [ 55] '7'
INVALID_LETTER, // [ 56] '8'
INVALID_LETTER, // [ 57] '9'
INVALID_LETTER, // [ 58] ':'
INVALID_LETTER, // [ 59] ';'
INVALID_LETTER, // [ 60] '<'
INVALID_LETTER, // [ 61] '='
INVALID_LETTER, // [ 62] '>'
INVALID_LETTER, // [ 63] '?'
INVALID_LETTER, // [ 64] '@'
0 , // [ 65] 'A' = Ala
INVALID_LETTER, // [ 66] 'B'
1 , // [ 67] 'C' = Cys
2 , // [ 68] 'D' = Asp
3 , // [ 69] 'E' = Glu
4 , // [ 70] 'F' = Phe
5 , // [ 71] 'G' = Gly
6 , // [ 72] 'H' = His
7 , // [ 73] 'I' = Ile
INVALID_LETTER, // [ 74] 'J'
8 , // [ 75] 'K' = Lys
9 , // [ 76] 'L' = Leu
10 , // [ 77] 'M' = Met
11 , // [ 78] 'N' = Asn
INVALID_LETTER, // [ 79] 'O'
12 , // [ 80] 'P' = Pro
13 , // [ 81] 'Q' = Gln
14 , // [ 82] 'R' = Arg
15 , // [ 83] 'S' = Ser
16 , // [ 84] 'T' = Thr
INVALID_LETTER, // [ 85] 'U'
17 , // [ 86] 'V' = Val
18 , // [ 87] 'W' = Trp
INVALID_LETTER, // [ 88] 'X'
19 , // [ 89] 'Y' = Tyr
INVALID_LETTER, // [ 90] 'Z'
INVALID_LETTER, // [ 91] '['
INVALID_LETTER, // [ 92] '\'
INVALID_LETTER, // [ 93] ']'
INVALID_LETTER, // [ 94] '^'
INVALID_LETTER, // [ 95] '_'
INVALID_LETTER, // [ 96] '`'
0 , // [ 97] 'a' = Ala
INVALID_LETTER, // [ 98] 'b'
1 , // [ 99] 'c' = Cys
2 , // [100] 'd' = Asp
3 , // [101] 'e' = Glu
4 , // [102] 'f' = Phe
5 , // [103] 'g' = Gly
6 , // [104] 'h' = His
7 , // [105] 'i' = Ile
INVALID_LETTER, // [106] 'j'
8 , // [107] 'k' = Lys
9 , // [108] 'l' = Leu
10 , // [109] 'm' = Met
11 , // [110] 'n' = Asn
INVALID_LETTER, // [111] 'o'
12 , // [112] 'p' = Pro
13 , // [113] 'q' = Gln
14 , // [114] 'r' = Arg
15 , // [115] 's' = Ser
16 , // [116] 't' = Thr
INVALID_LETTER, // [117] 'u'
17 , // [118] 'v' = Val
18 , // [119] 'w' = Trp
INVALID_LETTER, // [120] 'x'
19 , // [121] 'y' = Tyr
INVALID_LETTER, // [122] 'z'
INVALID_LETTER, // [123] '{'
INVALID_LETTER, // [124] '|'
INVALID_LETTER, // [125] '}'
INVALID_LETTER, // [126] '~'
INVALID_LETTER, // [127] 0x7f
INVALID_LETTER, // [128] 0x80
INVALID_LETTER, // [129] 0x81
INVALID_LETTER, // [130] 0x82
INVALID_LETTER, // [131] 0x83
INVALID_LETTER, // [132] 0x84
INVALID_LETTER, // [133] 0x85
INVALID_LETTER, // [134] 0x86
INVALID_LETTER, // [135] 0x87
INVALID_LETTER, // [136] 0x88
INVALID_LETTER, // [137] 0x89
INVALID_LETTER, // [138] 0x8a
INVALID_LETTER, // [139] 0x8b
INVALID_LETTER, // [140] 0x8c
INVALID_LETTER, // [141] 0x8d
INVALID_LETTER, // [142] 0x8e
INVALID_LETTER, // [143] 0x8f
INVALID_LETTER, // [144] 0x90
INVALID_LETTER, // [145] 0x91
INVALID_LETTER, // [146] 0x92
INVALID_LETTER, // [147] 0x93
INVALID_LETTER, // [148] 0x94
INVALID_LETTER, // [149] 0x95
INVALID_LETTER, // [150] 0x96
INVALID_LETTER, // [151] 0x97
INVALID_LETTER, // [152] 0x98
INVALID_LETTER, // [153] 0x99
INVALID_LETTER, // [154] 0x9a
INVALID_LETTER, // [155] 0x9b
INVALID_LETTER, // [156] 0x9c
INVALID_LETTER, // [157] 0x9d
INVALID_LETTER, // [158] 0x9e
INVALID_LETTER, // [159] 0x9f
INVALID_LETTER, // [160] 0xa0
INVALID_LETTER, // [161] 0xa1
INVALID_LETTER, // [162] 0xa2
INVALID_LETTER, // [163] 0xa3
INVALID_LETTER, // [164] 0xa4
INVALID_LETTER, // [165] 0xa5
INVALID_LETTER, // [166] 0xa6
INVALID_LETTER, // [167] 0xa7
INVALID_LETTER, // [168] 0xa8
INVALID_LETTER, // [169] 0xa9
INVALID_LETTER, // [170] 0xaa
INVALID_LETTER, // [171] 0xab
INVALID_LETTER, // [172] 0xac
INVALID_LETTER, // [173] 0xad
INVALID_LETTER, // [174] 0xae
INVALID_LETTER, // [175] 0xaf
INVALID_LETTER, // [176] 0xb0
INVALID_LETTER, // [177] 0xb1
INVALID_LETTER, // [178] 0xb2
INVALID_LETTER, // [179] 0xb3
INVALID_LETTER, // [180] 0xb4
INVALID_LETTER, // [181] 0xb5
INVALID_LETTER, // [182] 0xb6
INVALID_LETTER, // [183] 0xb7
INVALID_LETTER, // [184] 0xb8
INVALID_LETTER, // [185] 0xb9
INVALID_LETTER, // [186] 0xba
INVALID_LETTER, // [187] 0xbb
INVALID_LETTER, // [188] 0xbc
INVALID_LETTER, // [189] 0xbd
INVALID_LETTER, // [190] 0xbe
INVALID_LETTER, // [191] 0xbf
INVALID_LETTER, // [192] 0xc0
INVALID_LETTER, // [193] 0xc1
INVALID_LETTER, // [194] 0xc2
INVALID_LETTER, // [195] 0xc3
INVALID_LETTER, // [196] 0xc4
INVALID_LETTER, // [197] 0xc5
INVALID_LETTER, // [198] 0xc6
INVALID_LETTER, // [199] 0xc7
INVALID_LETTER, // [200] 0xc8
INVALID_LETTER, // [201] 0xc9
INVALID_LETTER, // [202] 0xca
INVALID_LETTER, // [203] 0xcb
INVALID_LETTER, // [204] 0xcc
INVALID_LETTER, // [205] 0xcd
INVALID_LETTER, // [206] 0xce
INVALID_LETTER, // [207] 0xcf
INVALID_LETTER, // [208] 0xd0
INVALID_LETTER, // [209] 0xd1
INVALID_LETTER, // [210] 0xd2
INVALID_LETTER, // [211] 0xd3
INVALID_LETTER, // [212] 0xd4
INVALID_LETTER, // [213] 0xd5
INVALID_LETTER, // [214] 0xd6
INVALID_LETTER, // [215] 0xd7
INVALID_LETTER, // [216] 0xd8
INVALID_LETTER, // [217] 0xd9
INVALID_LETTER, // [218] 0xda
INVALID_LETTER, // [219] 0xdb
INVALID_LETTER, // [220] 0xdc
INVALID_LETTER, // [221] 0xdd
INVALID_LETTER, // [222] 0xde
INVALID_LETTER, // [223] 0xdf
INVALID_LETTER, // [224] 0xe0
INVALID_LETTER, // [225] 0xe1
INVALID_LETTER, // [226] 0xe2
INVALID_LETTER, // [227] 0xe3
INVALID_LETTER, // [228] 0xe4
INVALID_LETTER, // [229] 0xe5
INVALID_LETTER, // [230] 0xe6
INVALID_LETTER, // [231] 0xe7
INVALID_LETTER, // [232] 0xe8
INVALID_LETTER, // [233] 0xe9
INVALID_LETTER, // [234] 0xea
INVALID_LETTER, // [235] 0xeb
INVALID_LETTER, // [236] 0xec
INVALID_LETTER, // [237] 0xed
INVALID_LETTER, // [238] 0xee
INVALID_LETTER, // [239] 0xef
INVALID_LETTER, // [240] 0xf0
INVALID_LETTER, // [241] 0xf1
INVALID_LETTER, // [242] 0xf2
INVALID_LETTER, // [243] 0xf3
INVALID_LETTER, // [244] 0xf4
INVALID_LETTER, // [245] 0xf5
INVALID_LETTER, // [246] 0xf6
INVALID_LETTER, // [247] 0xf7
INVALID_LETTER, // [248] 0xf8
INVALID_LETTER, // [249] 0xf9
INVALID_LETTER, // [250] 0xfa
INVALID_LETTER, // [251] 0xfb
INVALID_LETTER, // [252] 0xfc
INVALID_LETTER, // [253] 0xfd
INVALID_LETTER, // [254] 0xfe
INVALID_LETTER, // [255] 0xff
};
byte g_CharToLetterAmino[256] =
{
INVALID_LETTER, // [ 0] 0x00
INVALID_LETTER, // [ 1] 0x01
INVALID_LETTER, // [ 2] 0x02
INVALID_LETTER, // [ 3] 0x03
INVALID_LETTER, // [ 4] 0x04
INVALID_LETTER, // [ 5] 0x05
INVALID_LETTER, // [ 6] 0x06
INVALID_LETTER, // [ 7] 0x07
INVALID_LETTER, // [ 8] 0x08
INVALID_LETTER, // [ 9] 0x09
INVALID_LETTER, // [ 10] 0x0a
INVALID_LETTER, // [ 11] 0x0b
INVALID_LETTER, // [ 12] 0x0c
INVALID_LETTER, // [ 13] 0x0d
INVALID_LETTER, // [ 14] 0x0e
INVALID_LETTER, // [ 15] 0x0f
INVALID_LETTER, // [ 16] 0x10
INVALID_LETTER, // [ 17] 0x11
INVALID_LETTER, // [ 18] 0x12
INVALID_LETTER, // [ 19] 0x13
INVALID_LETTER, // [ 20] 0x14
INVALID_LETTER, // [ 21] 0x15
INVALID_LETTER, // [ 22] 0x16
INVALID_LETTER, // [ 23] 0x17
INVALID_LETTER, // [ 24] 0x18
INVALID_LETTER, // [ 25] 0x19
INVALID_LETTER, // [ 26] 0x1a
INVALID_LETTER, // [ 27] 0x1b
INVALID_LETTER, // [ 28] 0x1c
INVALID_LETTER, // [ 29] 0x1d
INVALID_LETTER, // [ 30] 0x1e
INVALID_LETTER, // [ 31] 0x1f
INVALID_LETTER, // [ 32] ' '
INVALID_LETTER, // [ 33] '!'
INVALID_LETTER, // [ 34] '"'
INVALID_LETTER, // [ 35] '#'
INVALID_LETTER, // [ 36] '$'
INVALID_LETTER, // [ 37] '%'
INVALID_LETTER, // [ 38] '&'
INVALID_LETTER, // [ 39] '''
INVALID_LETTER, // [ 40] '('
INVALID_LETTER, // [ 41] ')'
INVALID_LETTER, // [ 42] '*'
INVALID_LETTER, // [ 43] '+'
INVALID_LETTER, // [ 44] ','
INVALID_LETTER, // [ 45] '-'
INVALID_LETTER, // [ 46] '.'
INVALID_LETTER, // [ 47] '/'
INVALID_LETTER, // [ 48] '0'
INVALID_LETTER, // [ 49] '1'
INVALID_LETTER, // [ 50] '2'
INVALID_LETTER, // [ 51] '3'
INVALID_LETTER, // [ 52] '4'
INVALID_LETTER, // [ 53] '5'
INVALID_LETTER, // [ 54] '6'
INVALID_LETTER, // [ 55] '7'
INVALID_LETTER, // [ 56] '8'
INVALID_LETTER, // [ 57] '9'
INVALID_LETTER, // [ 58] ':'
INVALID_LETTER, // [ 59] ';'
INVALID_LETTER, // [ 60] '<'
INVALID_LETTER, // [ 61] '='
INVALID_LETTER, // [ 62] '>'
INVALID_LETTER, // [ 63] '?'
INVALID_LETTER, // [ 64] '@'
0 , // [ 65] 'A' = Ala
INVALID_LETTER, // [ 66] 'B'
1 , // [ 67] 'C' = Cys
2 , // [ 68] 'D' = Asp
3 , // [ 69] 'E' = Glu
4 , // [ 70] 'F' = Phe
5 , // [ 71] 'G' = Gly
6 , // [ 72] 'H' = His
7 , // [ 73] 'I' = Ile
INVALID_LETTER, // [ 74] 'J'
8 , // [ 75] 'K' = Lys
9 , // [ 76] 'L' = Leu
10 , // [ 77] 'M' = Met
11 , // [ 78] 'N' = Asn
INVALID_LETTER, // [ 79] 'O'
12 , // [ 80] 'P' = Pro
13 , // [ 81] 'Q' = Gln
14 , // [ 82] 'R' = Arg
15 , // [ 83] 'S' = Ser
16 , // [ 84] 'T' = Thr
INVALID_LETTER, // [ 85] 'U'
17 , // [ 86] 'V' = Val
18 , // [ 87] 'W' = Trp
INVALID_LETTER, // [ 88] 'X'
19 , // [ 89] 'Y' = Tyr
INVALID_LETTER, // [ 90] 'Z'
INVALID_LETTER, // [ 91] '['
INVALID_LETTER, // [ 92] '\'
INVALID_LETTER, // [ 93] ']'
INVALID_LETTER, // [ 94] '^'
INVALID_LETTER, // [ 95] '_'
INVALID_LETTER, // [ 96] '`'
0 , // [ 97] 'a' = Ala
INVALID_LETTER, // [ 98] 'b'
1 , // [ 99] 'c' = Cys
2 , // [100] 'd' = Asp
3 , // [101] 'e' = Glu
4 , // [102] 'f' = Phe
5 , // [103] 'g' = Gly
6 , // [104] 'h' = His
7 , // [105] 'i' = Ile
INVALID_LETTER, // [106] 'j'
8 , // [107] 'k' = Lys
9 , // [108] 'l' = Leu
10 , // [109] 'm' = Met
11 , // [110] 'n' = Asn
INVALID_LETTER, // [111] 'o'
12 , // [112] 'p' = Pro
13 , // [113] 'q' = Gln
14 , // [114] 'r' = Arg
15 , // [115] 's' = Ser
16 , // [116] 't' = Thr
INVALID_LETTER, // [117] 'u'
17 , // [118] 'v' = Val
18 , // [119] 'w' = Trp
INVALID_LETTER, // [120] 'x'
19 , // [121] 'y' = Tyr
INVALID_LETTER, // [122] 'z'
INVALID_LETTER, // [123] '{'
INVALID_LETTER, // [124] '|'
INVALID_LETTER, // [125] '}'
INVALID_LETTER, // [126] '~'
INVALID_LETTER, // [127] 0x7f
INVALID_LETTER, // [128] 0x80
INVALID_LETTER, // [129] 0x81
INVALID_LETTER, // [130] 0x82
INVALID_LETTER, // [131] 0x83
INVALID_LETTER, // [132] 0x84
INVALID_LETTER, // [133] 0x85
INVALID_LETTER, // [134] 0x86
INVALID_LETTER, // [135] 0x87
INVALID_LETTER, // [136] 0x88
INVALID_LETTER, // [137] 0x89
INVALID_LETTER, // [138] 0x8a
INVALID_LETTER, // [139] 0x8b
INVALID_LETTER, // [140] 0x8c
INVALID_LETTER, // [141] 0x8d
INVALID_LETTER, // [142] 0x8e
INVALID_LETTER, // [143] 0x8f
INVALID_LETTER, // [144] 0x90
INVALID_LETTER, // [145] 0x91
INVALID_LETTER, // [146] 0x92
INVALID_LETTER, // [147] 0x93
INVALID_LETTER, // [148] 0x94
INVALID_LETTER, // [149] 0x95
INVALID_LETTER, // [150] 0x96
INVALID_LETTER, // [151] 0x97
INVALID_LETTER, // [152] 0x98
INVALID_LETTER, // [153] 0x99
INVALID_LETTER, // [154] 0x9a
INVALID_LETTER, // [155] 0x9b
INVALID_LETTER, // [156] 0x9c
INVALID_LETTER, // [157] 0x9d
INVALID_LETTER, // [158] 0x9e
INVALID_LETTER, // [159] 0x9f
INVALID_LETTER, // [160] 0xa0
INVALID_LETTER, // [161] 0xa1
INVALID_LETTER, // [162] 0xa2
INVALID_LETTER, // [163] 0xa3
INVALID_LETTER, // [164] 0xa4
INVALID_LETTER, // [165] 0xa5
INVALID_LETTER, // [166] 0xa6
INVALID_LETTER, // [167] 0xa7
INVALID_LETTER, // [168] 0xa8
INVALID_LETTER, // [169] 0xa9
INVALID_LETTER, // [170] 0xaa
INVALID_LETTER, // [171] 0xab
INVALID_LETTER, // [172] 0xac
INVALID_LETTER, // [173] 0xad
INVALID_LETTER, // [174] 0xae
INVALID_LETTER, // [175] 0xaf
INVALID_LETTER, // [176] 0xb0
INVALID_LETTER, // [177] 0xb1
INVALID_LETTER, // [178] 0xb2
INVALID_LETTER, // [179] 0xb3
INVALID_LETTER, // [180] 0xb4
INVALID_LETTER, // [181] 0xb5
INVALID_LETTER, // [182] 0xb6
INVALID_LETTER, // [183] 0xb7
INVALID_LETTER, // [184] 0xb8
INVALID_LETTER, // [185] 0xb9
INVALID_LETTER, // [186] 0xba
INVALID_LETTER, // [187] 0xbb
INVALID_LETTER, // [188] 0xbc
INVALID_LETTER, // [189] 0xbd
INVALID_LETTER, // [190] 0xbe
INVALID_LETTER, // [191] 0xbf
INVALID_LETTER, // [192] 0xc0
INVALID_LETTER, // [193] 0xc1
INVALID_LETTER, // [194] 0xc2
INVALID_LETTER, // [195] 0xc3
INVALID_LETTER, // [196] 0xc4
INVALID_LETTER, // [197] 0xc5
INVALID_LETTER, // [198] 0xc6
INVALID_LETTER, // [199] 0xc7
INVALID_LETTER, // [200] 0xc8
INVALID_LETTER, // [201] 0xc9
INVALID_LETTER, // [202] 0xca
INVALID_LETTER, // [203] 0xcb
INVALID_LETTER, // [204] 0xcc
INVALID_LETTER, // [205] 0xcd
INVALID_LETTER, // [206] 0xce
INVALID_LETTER, // [207] 0xcf
INVALID_LETTER, // [208] 0xd0
INVALID_LETTER, // [209] 0xd1
INVALID_LETTER, // [210] 0xd2
INVALID_LETTER, // [211] 0xd3
INVALID_LETTER, // [212] 0xd4
INVALID_LETTER, // [213] 0xd5
INVALID_LETTER, // [214] 0xd6
INVALID_LETTER, // [215] 0xd7
INVALID_LETTER, // [216] 0xd8
INVALID_LETTER, // [217] 0xd9
INVALID_LETTER, // [218] 0xda
INVALID_LETTER, // [219] 0xdb
INVALID_LETTER, // [220] 0xdc
INVALID_LETTER, // [221] 0xdd
INVALID_LETTER, // [222] 0xde
INVALID_LETTER, // [223] 0xdf
INVALID_LETTER, // [224] 0xe0
INVALID_LETTER, // [225] 0xe1
INVALID_LETTER, // [226] 0xe2
INVALID_LETTER, // [227] 0xe3
INVALID_LETTER, // [228] 0xe4
INVALID_LETTER, // [229] 0xe5
INVALID_LETTER, // [230] 0xe6
INVALID_LETTER, // [231] 0xe7
INVALID_LETTER, // [232] 0xe8
INVALID_LETTER, // [233] 0xe9
INVALID_LETTER, // [234] 0xea
INVALID_LETTER, // [235] 0xeb
INVALID_LETTER, // [236] 0xec
INVALID_LETTER, // [237] 0xed
INVALID_LETTER, // [238] 0xee
INVALID_LETTER, // [239] 0xef
INVALID_LETTER, // [240] 0xf0
INVALID_LETTER, // [241] 0xf1
INVALID_LETTER, // [242] 0xf2
INVALID_LETTER, // [243] 0xf3
INVALID_LETTER, // [244] 0xf4
INVALID_LETTER, // [245] 0xf5
INVALID_LETTER, // [246] 0xf6
INVALID_LETTER, // [247] 0xf7
INVALID_LETTER, // [248] 0xf8
INVALID_LETTER, // [249] 0xf9
INVALID_LETTER, // [250] 0xfa
INVALID_LETTER, // [251] 0xfb
INVALID_LETTER, // [252] 0xfc
INVALID_LETTER, // [253] 0xfd
INVALID_LETTER, // [254] 0xfe
INVALID_LETTER, // [255] 0xff
};
byte g_LetterToCharAmino[256] =
{
'A', // [0]
'C', // [1]
'D', // [2]
'E', // [3]
'F', // [4]
'G', // [5]
'H', // [6]
'I', // [7]
'K', // [8]
'L', // [9]
'M', // [10]
'N', // [11]
'P', // [12]
'Q', // [13]
'R', // [14]
'S', // [15]
'T', // [16]
'V', // [17]
'W', // [18]
'Y', // [19]
'*', // [20]
INVALID_CHAR, // [21]
INVALID_CHAR, // [22]
INVALID_CHAR, // [23]
INVALID_CHAR, // [24]
INVALID_CHAR, // [25]
INVALID_CHAR, // [26]
INVALID_CHAR, // [27]
INVALID_CHAR, // [28]
INVALID_CHAR, // [29]
INVALID_CHAR, // [30]
INVALID_CHAR, // [31]
INVALID_CHAR, // [32]
INVALID_CHAR, // [33]
INVALID_CHAR, // [34]
INVALID_CHAR, // [35]
INVALID_CHAR, // [36]
INVALID_CHAR, // [37]
INVALID_CHAR, // [38]
INVALID_CHAR, // [39]
INVALID_CHAR, // [40]
INVALID_CHAR, // [41]
INVALID_CHAR, // [42]
INVALID_CHAR, // [43]
INVALID_CHAR, // [44]
INVALID_CHAR, // [45]
INVALID_CHAR, // [46]
INVALID_CHAR, // [47]
INVALID_CHAR, // [48]
INVALID_CHAR, // [49]
INVALID_CHAR, // [50]
INVALID_CHAR, // [51]
INVALID_CHAR, // [52]
INVALID_CHAR, // [53]
INVALID_CHAR, // [54]
INVALID_CHAR, // [55]
INVALID_CHAR, // [56]
INVALID_CHAR, // [57]
INVALID_CHAR, // [58]
INVALID_CHAR, // [59]
INVALID_CHAR, // [60]
INVALID_CHAR, // [61]
INVALID_CHAR, // [62]
INVALID_CHAR, // [63]
INVALID_CHAR, // [64]
INVALID_CHAR, // [65]
INVALID_CHAR, // [66]
INVALID_CHAR, // [67]
INVALID_CHAR, // [68]
INVALID_CHAR, // [69]
INVALID_CHAR, // [70]
INVALID_CHAR, // [71]
INVALID_CHAR, // [72]
INVALID_CHAR, // [73]
INVALID_CHAR, // [74]
INVALID_CHAR, // [75]
INVALID_CHAR, // [76]
INVALID_CHAR, // [77]
INVALID_CHAR, // [78]
INVALID_CHAR, // [79]
INVALID_CHAR, // [80]
INVALID_CHAR, // [81]
INVALID_CHAR, // [82]
INVALID_CHAR, // [83]
INVALID_CHAR, // [84]
INVALID_CHAR, // [85]
INVALID_CHAR, // [86]
INVALID_CHAR, // [87]
INVALID_CHAR, // [88]
INVALID_CHAR, // [89]
INVALID_CHAR, // [90]
INVALID_CHAR, // [91]
INVALID_CHAR, // [92]
INVALID_CHAR, // [93]
INVALID_CHAR, // [94]
INVALID_CHAR, // [95]
INVALID_CHAR, // [96]
INVALID_CHAR, // [97]
INVALID_CHAR, // [98]
INVALID_CHAR, // [99]
INVALID_CHAR, // [100]
INVALID_CHAR, // [101]
INVALID_CHAR, // [102]
INVALID_CHAR, // [103]
INVALID_CHAR, // [104]
INVALID_CHAR, // [105]
INVALID_CHAR, // [106]
INVALID_CHAR, // [107]
INVALID_CHAR, // [108]
INVALID_CHAR, // [109]
INVALID_CHAR, // [110]
INVALID_CHAR, // [111]
INVALID_CHAR, // [112]
INVALID_CHAR, // [113]
INVALID_CHAR, // [114]
INVALID_CHAR, // [115]
INVALID_CHAR, // [116]
INVALID_CHAR, // [117]
INVALID_CHAR, // [118]
INVALID_CHAR, // [119]
INVALID_CHAR, // [120]
INVALID_CHAR, // [121]
INVALID_CHAR, // [122]
INVALID_CHAR, // [123]
INVALID_CHAR, // [124]
INVALID_CHAR, // [125]
INVALID_CHAR, // [126]
INVALID_CHAR, // [127]
INVALID_CHAR, // [128]
INVALID_CHAR, // [129]
INVALID_CHAR, // [130]
INVALID_CHAR, // [131]
INVALID_CHAR, // [132]
INVALID_CHAR, // [133]
INVALID_CHAR, // [134]
INVALID_CHAR, // [135]
INVALID_CHAR, // [136]
INVALID_CHAR, // [137]
INVALID_CHAR, // [138]
INVALID_CHAR, // [139]
INVALID_CHAR, // [140]
INVALID_CHAR, // [141]
INVALID_CHAR, // [142]
INVALID_CHAR, // [143]
INVALID_CHAR, // [144]
INVALID_CHAR, // [145]
INVALID_CHAR, // [146]
INVALID_CHAR, // [147]
INVALID_CHAR, // [148]
INVALID_CHAR, // [149]
INVALID_CHAR, // [150]
INVALID_CHAR, // [151]
INVALID_CHAR, // [152]
INVALID_CHAR, // [153]
INVALID_CHAR, // [154]
INVALID_CHAR, // [155]
INVALID_CHAR, // [156]
INVALID_CHAR, // [157]
INVALID_CHAR, // [158]
INVALID_CHAR, // [159]
INVALID_CHAR, // [160]
INVALID_CHAR, // [161]
INVALID_CHAR, // [162]
INVALID_CHAR, // [163]
INVALID_CHAR, // [164]
INVALID_CHAR, // [165]
INVALID_CHAR, // [166]
INVALID_CHAR, // [167]
INVALID_CHAR, // [168]
INVALID_CHAR, // [169]
INVALID_CHAR, // [170]
INVALID_CHAR, // [171]
INVALID_CHAR, // [172]
INVALID_CHAR, // [173]
INVALID_CHAR, // [174]
INVALID_CHAR, // [175]
INVALID_CHAR, // [176]
INVALID_CHAR, // [177]
INVALID_CHAR, // [178]
INVALID_CHAR, // [179]
INVALID_CHAR, // [180]
INVALID_CHAR, // [181]
INVALID_CHAR, // [182]
INVALID_CHAR, // [183]
INVALID_CHAR, // [184]
INVALID_CHAR, // [185]
INVALID_CHAR, // [186]
INVALID_CHAR, // [187]
INVALID_CHAR, // [188]
INVALID_CHAR, // [189]
INVALID_CHAR, // [190]
INVALID_CHAR, // [191]
INVALID_CHAR, // [192]
INVALID_CHAR, // [193]
INVALID_CHAR, // [194]
INVALID_CHAR, // [195]
INVALID_CHAR, // [196]
INVALID_CHAR, // [197]
INVALID_CHAR, // [198]
INVALID_CHAR, // [199]
INVALID_CHAR, // [200]
INVALID_CHAR, // [201]
INVALID_CHAR, // [202]
INVALID_CHAR, // [203]
INVALID_CHAR, // [204]
INVALID_CHAR, // [205]
INVALID_CHAR, // [206]
INVALID_CHAR, // [207]
INVALID_CHAR, // [208]
INVALID_CHAR, // [209]
INVALID_CHAR, // [210]
INVALID_CHAR, // [211]
INVALID_CHAR, // [212]
INVALID_CHAR, // [213]
INVALID_CHAR, // [214]
INVALID_CHAR, // [215]
INVALID_CHAR, // [216]
INVALID_CHAR, // [217]
INVALID_CHAR, // [218]
INVALID_CHAR, // [219]
INVALID_CHAR, // [220]
INVALID_CHAR, // [221]
INVALID_CHAR, // [222]
INVALID_CHAR, // [223]
INVALID_CHAR, // [224]
INVALID_CHAR, // [225]
INVALID_CHAR, // [226]
INVALID_CHAR, // [227]
INVALID_CHAR, // [228]
INVALID_CHAR, // [229]
INVALID_CHAR, // [230]
INVALID_CHAR, // [231]
INVALID_CHAR, // [232]
INVALID_CHAR, // [233]
INVALID_CHAR, // [234]
INVALID_CHAR, // [235]
INVALID_CHAR, // [236]
INVALID_CHAR, // [237]
INVALID_CHAR, // [238]
INVALID_CHAR, // [239]
INVALID_CHAR, // [240]
INVALID_CHAR, // [241]
INVALID_CHAR, // [242]
INVALID_CHAR, // [243]
INVALID_CHAR, // [244]
INVALID_CHAR, // [245]
INVALID_CHAR, // [246]
INVALID_CHAR, // [247]
INVALID_CHAR, // [248]
INVALID_CHAR, // [249]
INVALID_CHAR, // [250]
INVALID_CHAR, // [251]
INVALID_CHAR, // [252]
INVALID_CHAR, // [253]
INVALID_CHAR, // [254]
INVALID_CHAR, // [255]
};
byte g_LetterToCharAminoGap[256] =
{
'A', // [0]
'C', // [1]
'D', // [2]
'E', // [3]
'F', // [4]
'G', // [5]
'H', // [6]
'I', // [7]
'K', // [8]
'L', // [9]
'M', // [10]
'N', // [11]
'P', // [12]
'Q', // [13]
'R', // [14]
'S', // [15]
'T', // [16]
'V', // [17]
'W', // [18]
'Y', // [19]
INVALID_CHAR, // [20]
'-', // [20]
INVALID_CHAR, // [22]
INVALID_CHAR, // [23]
INVALID_CHAR, // [24]
INVALID_CHAR, // [25]
INVALID_CHAR, // [26]
INVALID_CHAR, // [27]
INVALID_CHAR, // [28]
INVALID_CHAR, // [29]
INVALID_CHAR, // [30]
INVALID_CHAR, // [31]
INVALID_CHAR, // [32]
INVALID_CHAR, // [33]
INVALID_CHAR, // [34]
INVALID_CHAR, // [35]
INVALID_CHAR, // [36]
INVALID_CHAR, // [37]
INVALID_CHAR, // [38]
INVALID_CHAR, // [39]
INVALID_CHAR, // [40]
INVALID_CHAR, // [41]
INVALID_CHAR, // [42]
INVALID_CHAR, // [43]
INVALID_CHAR, // [44]
INVALID_CHAR, // [45]
INVALID_CHAR, // [46]
INVALID_CHAR, // [47]
INVALID_CHAR, // [48]
INVALID_CHAR, // [49]
INVALID_CHAR, // [50]
INVALID_CHAR, // [51]
INVALID_CHAR, // [52]
INVALID_CHAR, // [53]
INVALID_CHAR, // [54]
INVALID_CHAR, // [55]
INVALID_CHAR, // [56]
INVALID_CHAR, // [57]
INVALID_CHAR, // [58]
INVALID_CHAR, // [59]
INVALID_CHAR, // [60]
INVALID_CHAR, // [61]
INVALID_CHAR, // [62]
INVALID_CHAR, // [63]
INVALID_CHAR, // [64]
INVALID_CHAR, // [65]
INVALID_CHAR, // [66]
INVALID_CHAR, // [67]
INVALID_CHAR, // [68]
INVALID_CHAR, // [69]
INVALID_CHAR, // [70]
INVALID_CHAR, // [71]
INVALID_CHAR, // [72]
INVALID_CHAR, // [73]
INVALID_CHAR, // [74]
INVALID_CHAR, // [75]
INVALID_CHAR, // [76]
INVALID_CHAR, // [77]
INVALID_CHAR, // [78]
INVALID_CHAR, // [79]
INVALID_CHAR, // [80]
INVALID_CHAR, // [81]
INVALID_CHAR, // [82]
INVALID_CHAR, // [83]
INVALID_CHAR, // [84]
INVALID_CHAR, // [85]
INVALID_CHAR, // [86]
INVALID_CHAR, // [87]
INVALID_CHAR, // [88]
INVALID_CHAR, // [89]
INVALID_CHAR, // [90]
INVALID_CHAR, // [91]
INVALID_CHAR, // [92]
INVALID_CHAR, // [93]
INVALID_CHAR, // [94]
INVALID_CHAR, // [95]
INVALID_CHAR, // [96]
INVALID_CHAR, // [97]
INVALID_CHAR, // [98]
INVALID_CHAR, // [99]
INVALID_CHAR, // [100]
INVALID_CHAR, // [101]
INVALID_CHAR, // [102]
INVALID_CHAR, // [103]
INVALID_CHAR, // [104]
INVALID_CHAR, // [105]
INVALID_CHAR, // [106]
INVALID_CHAR, // [107]
INVALID_CHAR, // [108]
INVALID_CHAR, // [109]
INVALID_CHAR, // [110]
INVALID_CHAR, // [111]
INVALID_CHAR, // [112]
INVALID_CHAR, // [113]
INVALID_CHAR, // [114]
INVALID_CHAR, // [115]
INVALID_CHAR, // [116]
INVALID_CHAR, // [117]
INVALID_CHAR, // [118]
INVALID_CHAR, // [119]
INVALID_CHAR, // [120]
INVALID_CHAR, // [121]
INVALID_CHAR, // [122]
INVALID_CHAR, // [123]
INVALID_CHAR, // [124]
INVALID_CHAR, // [125]
INVALID_CHAR, // [126]
INVALID_CHAR, // [127]
INVALID_CHAR, // [128]
INVALID_CHAR, // [129]
INVALID_CHAR, // [130]
INVALID_CHAR, // [131]
INVALID_CHAR, // [132]
INVALID_CHAR, // [133]
INVALID_CHAR, // [134]
INVALID_CHAR, // [135]
INVALID_CHAR, // [136]
INVALID_CHAR, // [137]
INVALID_CHAR, // [138]
INVALID_CHAR, // [139]
INVALID_CHAR, // [140]
INVALID_CHAR, // [141]
INVALID_CHAR, // [142]
INVALID_CHAR, // [143]
INVALID_CHAR, // [144]
INVALID_CHAR, // [145]
INVALID_CHAR, // [146]
INVALID_CHAR, // [147]
INVALID_CHAR, // [148]
INVALID_CHAR, // [149]
INVALID_CHAR, // [150]
INVALID_CHAR, // [151]
INVALID_CHAR, // [152]
INVALID_CHAR, // [153]
INVALID_CHAR, // [154]
INVALID_CHAR, // [155]
INVALID_CHAR, // [156]
INVALID_CHAR, // [157]
INVALID_CHAR, // [158]
INVALID_CHAR, // [159]
INVALID_CHAR, // [160]
INVALID_CHAR, // [161]
INVALID_CHAR, // [162]
INVALID_CHAR, // [163]
INVALID_CHAR, // [164]
INVALID_CHAR, // [165]
INVALID_CHAR, // [166]
INVALID_CHAR, // [167]
INVALID_CHAR, // [168]
INVALID_CHAR, // [169]
INVALID_CHAR, // [170]
INVALID_CHAR, // [171]
INVALID_CHAR, // [172]
INVALID_CHAR, // [173]
INVALID_CHAR, // [174]
INVALID_CHAR, // [175]
INVALID_CHAR, // [176]
INVALID_CHAR, // [177]
INVALID_CHAR, // [178]
INVALID_CHAR, // [179]
INVALID_CHAR, // [180]
INVALID_CHAR, // [181]
INVALID_CHAR, // [182]
INVALID_CHAR, // [183]
INVALID_CHAR, // [184]
INVALID_CHAR, // [185]
INVALID_CHAR, // [186]
INVALID_CHAR, // [187]
INVALID_CHAR, // [188]
INVALID_CHAR, // [189]
INVALID_CHAR, // [190]
INVALID_CHAR, // [191]
INVALID_CHAR, // [192]
INVALID_CHAR, // [193]
INVALID_CHAR, // [194]
INVALID_CHAR, // [195]
INVALID_CHAR, // [196]
INVALID_CHAR, // [197]
INVALID_CHAR, // [198]
INVALID_CHAR, // [199]
INVALID_CHAR, // [200]
INVALID_CHAR, // [201]
INVALID_CHAR, // [202]
INVALID_CHAR, // [203]
INVALID_CHAR, // [204]
INVALID_CHAR, // [205]
INVALID_CHAR, // [206]
INVALID_CHAR, // [207]
INVALID_CHAR, // [208]
INVALID_CHAR, // [209]
INVALID_CHAR, // [210]
INVALID_CHAR, // [211]
INVALID_CHAR, // [212]
INVALID_CHAR, // [213]
INVALID_CHAR, // [214]
INVALID_CHAR, // [215]
INVALID_CHAR, // [216]
INVALID_CHAR, // [217]
INVALID_CHAR, // [218]
INVALID_CHAR, // [219]
INVALID_CHAR, // [220]
INVALID_CHAR, // [221]
INVALID_CHAR, // [222]
INVALID_CHAR, // [223]
INVALID_CHAR, // [224]
INVALID_CHAR, // [225]
INVALID_CHAR, // [226]
INVALID_CHAR, // [227]
INVALID_CHAR, // [228]
INVALID_CHAR, // [229]
INVALID_CHAR, // [230]
INVALID_CHAR, // [231]
INVALID_CHAR, // [232]
INVALID_CHAR, // [233]
INVALID_CHAR, // [234]
INVALID_CHAR, // [235]
INVALID_CHAR, // [236]
INVALID_CHAR, // [237]
INVALID_CHAR, // [238]
INVALID_CHAR, // [239]
INVALID_CHAR, // [240]
INVALID_CHAR, // [241]
INVALID_CHAR, // [242]
INVALID_CHAR, // [243]
INVALID_CHAR, // [244]
INVALID_CHAR, // [245]
INVALID_CHAR, // [246]
INVALID_CHAR, // [247]
INVALID_CHAR, // [248]
INVALID_CHAR, // [249]
INVALID_CHAR, // [250]
INVALID_CHAR, // [251]
INVALID_CHAR, // [252]
INVALID_CHAR, // [253]
INVALID_CHAR, // [254]
INVALID_CHAR, // [255]
};
byte g_CharToLetterNucleo[256] =
{
INVALID_LETTER, // [ 0] = 0x00
INVALID_LETTER, // [ 1] = 0x01
INVALID_LETTER, // [ 2] = 0x02
INVALID_LETTER, // [ 3] = 0x03
INVALID_LETTER, // [ 4] = 0x04
INVALID_LETTER, // [ 5] = 0x05
INVALID_LETTER, // [ 6] = 0x06
INVALID_LETTER, // [ 7] = 0x07
INVALID_LETTER, // [ 8] = 0x08
INVALID_LETTER, // [ 9] = 0x09
INVALID_LETTER, // [ 10] = 0x0a
INVALID_LETTER, // [ 11] = 0x0b
INVALID_LETTER, // [ 12] = 0x0c
INVALID_LETTER, // [ 13] = 0x0d
INVALID_LETTER, // [ 14] = 0x0e
INVALID_LETTER, // [ 15] = 0x0f
INVALID_LETTER, // [ 16] = 0x10
INVALID_LETTER, // [ 17] = 0x11
INVALID_LETTER, // [ 18] = 0x12
INVALID_LETTER, // [ 19] = 0x13
INVALID_LETTER, // [ 20] = 0x14
INVALID_LETTER, // [ 21] = 0x15
INVALID_LETTER, // [ 22] = 0x16
INVALID_LETTER, // [ 23] = 0x17
INVALID_LETTER, // [ 24] = 0x18
INVALID_LETTER, // [ 25] = 0x19
INVALID_LETTER, // [ 26] = 0x1a
INVALID_LETTER, // [ 27] = 0x1b
INVALID_LETTER, // [ 28] = 0x1c
INVALID_LETTER, // [ 29] = 0x1d
INVALID_LETTER, // [ 30] = 0x1e
INVALID_LETTER, // [ 31] = 0x1f
INVALID_LETTER, // [ 32] = 32
INVALID_LETTER, // [ 33] = 33
INVALID_LETTER, // [ 34] = 34
INVALID_LETTER, // [ 35] = 35
INVALID_LETTER, // [ 36] = 36
INVALID_LETTER, // [ 37] = 37
INVALID_LETTER, // [ 38] = 38
INVALID_LETTER, // [ 39] = 39
INVALID_LETTER, // [ 40] = 40
INVALID_LETTER, // [ 41] = 41
INVALID_LETTER, // [ 42] = 42
INVALID_LETTER, // [ 43] = 43
INVALID_LETTER, // [ 44] = 44
INVALID_LETTER, // [ 45] = 45
INVALID_LETTER, // [ 46] = 46
INVALID_LETTER, // [ 47] = 47
INVALID_LETTER, // [ 48] = 48
INVALID_LETTER, // [ 49] = 49
INVALID_LETTER, // [ 50] = 50
INVALID_LETTER, // [ 51] = 51
INVALID_LETTER, // [ 52] = 52
INVALID_LETTER, // [ 53] = 53
INVALID_LETTER, // [ 54] = 54
INVALID_LETTER, // [ 55] = 55
INVALID_LETTER, // [ 56] = 56
INVALID_LETTER, // [ 57] = 57
INVALID_LETTER, // [ 58] = 58
INVALID_LETTER, // [ 59] = 59
INVALID_LETTER, // [ 60] = 60
INVALID_LETTER, // [ 61] = 61
INVALID_LETTER, // [ 62] = 62
INVALID_LETTER, // [ 63] = 63
INVALID_LETTER, // [ 64] = 64
0 , // [ 65] = A (Nucleotide)
INVALID_LETTER, // [ 66] = 66
1 , // [ 67] = C (Nucleotide)
INVALID_LETTER, // [ 68] = 68
INVALID_LETTER, // [ 69] = 69
INVALID_LETTER, // [ 70] = 70
2 , // [ 71] = G (Nucleotide)
INVALID_LETTER, // [ 72] = 72
INVALID_LETTER, // [ 73] = 73
INVALID_LETTER, // [ 74] = 74
INVALID_LETTER, // [ 75] = 75
INVALID_LETTER, // [ 76] = 76
INVALID_LETTER, // [ 77] = 77
INVALID_LETTER, // [ 78] = 78
INVALID_LETTER, // [ 79] = 79
INVALID_LETTER, // [ 80] = 80
INVALID_LETTER, // [ 81] = 81
INVALID_LETTER, // [ 82] = 82
INVALID_LETTER, // [ 83] = 83
3 , // [ 84] = T (Nucleotide)
3 , // [ 85] = U (Nucleotide)
INVALID_LETTER, // [ 86] = 86
INVALID_LETTER, // [ 87] = 87
INVALID_LETTER, // [ 88] = 88
INVALID_LETTER, // [ 89] = 89
INVALID_LETTER, // [ 90] = 90
INVALID_LETTER, // [ 91] = 91
INVALID_LETTER, // [ 92] = 92
INVALID_LETTER, // [ 93] = 93
INVALID_LETTER, // [ 94] = 94
INVALID_LETTER, // [ 95] = 95
INVALID_LETTER, // [ 96] = 96
0 , // [ 97] = a (Nucleotide)
INVALID_LETTER, // [ 98] = 98
1 , // [ 99] = c (Nucleotide)
INVALID_LETTER, // [100] = 100
INVALID_LETTER, // [101] = 101
INVALID_LETTER, // [102] = 102
2 , // [103] = g (Nucleotide)
INVALID_LETTER, // [104] = 104
INVALID_LETTER, // [105] = 105
INVALID_LETTER, // [106] = 106
INVALID_LETTER, // [107] = 107
INVALID_LETTER, // [108] = 108
INVALID_LETTER, // [109] = 109
INVALID_LETTER, // [110] = 110
INVALID_LETTER, // [111] = 111
INVALID_LETTER, // [112] = 112
INVALID_LETTER, // [113] = 113
INVALID_LETTER, // [114] = 114
INVALID_LETTER, // [115] = 115
3 , // [116] = t (Nucleotide)
3 , // [117] = u (Nucleotide)
INVALID_LETTER, // [118] = 118
INVALID_LETTER, // [119] = 119
INVALID_LETTER, // [120] = 120
INVALID_LETTER, // [121] = 121
INVALID_LETTER, // [122] = 122
INVALID_LETTER, // [123] = 123
INVALID_LETTER, // [124] = 124
INVALID_LETTER, // [125] = 125
INVALID_LETTER, // [126] = 126
INVALID_LETTER, // [127] = 0x7f
INVALID_LETTER, // [128] = 0x80
INVALID_LETTER, // [129] = 0x81
INVALID_LETTER, // [130] = 0x82
INVALID_LETTER, // [131] = 0x83
INVALID_LETTER, // [132] = 0x84
INVALID_LETTER, // [133] = 0x85
INVALID_LETTER, // [134] = 0x86
INVALID_LETTER, // [135] = 0x87
INVALID_LETTER, // [136] = 0x88
INVALID_LETTER, // [137] = 0x89
INVALID_LETTER, // [138] = 0x8a
INVALID_LETTER, // [139] = 0x8b
INVALID_LETTER, // [140] = 0x8c
INVALID_LETTER, // [141] = 0x8d
INVALID_LETTER, // [142] = 0x8e
INVALID_LETTER, // [143] = 0x8f
INVALID_LETTER, // [144] = 0x90
INVALID_LETTER, // [145] = 0x91
INVALID_LETTER, // [146] = 0x92
INVALID_LETTER, // [147] = 0x93
INVALID_LETTER, // [148] = 0x94
INVALID_LETTER, // [149] = 0x95
INVALID_LETTER, // [150] = 0x96
INVALID_LETTER, // [151] = 0x97
INVALID_LETTER, // [152] = 0x98
INVALID_LETTER, // [153] = 0x99
INVALID_LETTER, // [154] = 0x9a
INVALID_LETTER, // [155] = 0x9b
INVALID_LETTER, // [156] = 0x9c
INVALID_LETTER, // [157] = 0x9d
INVALID_LETTER, // [158] = 0x9e
INVALID_LETTER, // [159] = 0x9f
INVALID_LETTER, // [160] = 0xa0
INVALID_LETTER, // [161] = 0xa1
INVALID_LETTER, // [162] = 0xa2
INVALID_LETTER, // [163] = 0xa3
INVALID_LETTER, // [164] = 0xa4
INVALID_LETTER, // [165] = 0xa5
INVALID_LETTER, // [166] = 0xa6
INVALID_LETTER, // [167] = 0xa7
INVALID_LETTER, // [168] = 0xa8
INVALID_LETTER, // [169] = 0xa9
INVALID_LETTER, // [170] = 0xaa
INVALID_LETTER, // [171] = 0xab
INVALID_LETTER, // [172] = 0xac
INVALID_LETTER, // [173] = 0xad
INVALID_LETTER, // [174] = 0xae
INVALID_LETTER, // [175] = 0xaf
INVALID_LETTER, // [176] = 0xb0
INVALID_LETTER, // [177] = 0xb1
INVALID_LETTER, // [178] = 0xb2
INVALID_LETTER, // [179] = 0xb3
INVALID_LETTER, // [180] = 0xb4
INVALID_LETTER, // [181] = 0xb5
INVALID_LETTER, // [182] = 0xb6
INVALID_LETTER, // [183] = 0xb7
INVALID_LETTER, // [184] = 0xb8
INVALID_LETTER, // [185] = 0xb9
INVALID_LETTER, // [186] = 0xba
INVALID_LETTER, // [187] = 0xbb
INVALID_LETTER, // [188] = 0xbc
INVALID_LETTER, // [189] = 0xbd
INVALID_LETTER, // [190] = 0xbe
INVALID_LETTER, // [191] = 0xbf
INVALID_LETTER, // [192] = 0xc0
INVALID_LETTER, // [193] = 0xc1
INVALID_LETTER, // [194] = 0xc2
INVALID_LETTER, // [195] = 0xc3
INVALID_LETTER, // [196] = 0xc4
INVALID_LETTER, // [197] = 0xc5
INVALID_LETTER, // [198] = 0xc6
INVALID_LETTER, // [199] = 0xc7
INVALID_LETTER, // [200] = 0xc8
INVALID_LETTER, // [201] = 0xc9
INVALID_LETTER, // [202] = 0xca
INVALID_LETTER, // [203] = 0xcb
INVALID_LETTER, // [204] = 0xcc
INVALID_LETTER, // [205] = 0xcd
INVALID_LETTER, // [206] = 0xce
INVALID_LETTER, // [207] = 0xcf
INVALID_LETTER, // [208] = 0xd0
INVALID_LETTER, // [209] = 0xd1
INVALID_LETTER, // [210] = 0xd2
INVALID_LETTER, // [211] = 0xd3
INVALID_LETTER, // [212] = 0xd4
INVALID_LETTER, // [213] = 0xd5
INVALID_LETTER, // [214] = 0xd6
INVALID_LETTER, // [215] = 0xd7
INVALID_LETTER, // [216] = 0xd8
INVALID_LETTER, // [217] = 0xd9
INVALID_LETTER, // [218] = 0xda
INVALID_LETTER, // [219] = 0xdb
INVALID_LETTER, // [220] = 0xdc
INVALID_LETTER, // [221] = 0xdd
INVALID_LETTER, // [222] = 0xde
INVALID_LETTER, // [223] = 0xdf
INVALID_LETTER, // [224] = 0xe0
INVALID_LETTER, // [225] = 0xe1
INVALID_LETTER, // [226] = 0xe2
INVALID_LETTER, // [227] = 0xe3
INVALID_LETTER, // [228] = 0xe4
INVALID_LETTER, // [229] = 0xe5
INVALID_LETTER, // [230] = 0xe6
INVALID_LETTER, // [231] = 0xe7
INVALID_LETTER, // [232] = 0xe8
INVALID_LETTER, // [233] = 0xe9
INVALID_LETTER, // [234] = 0xea
INVALID_LETTER, // [235] = 0xeb
INVALID_LETTER, // [236] = 0xec
INVALID_LETTER, // [237] = 0xed
INVALID_LETTER, // [238] = 0xee
INVALID_LETTER, // [239] = 0xef
INVALID_LETTER, // [240] = 0xf0
INVALID_LETTER, // [241] = 0xf1
INVALID_LETTER, // [242] = 0xf2
INVALID_LETTER, // [243] = 0xf3
INVALID_LETTER, // [244] = 0xf4
INVALID_LETTER, // [245] = 0xf5
INVALID_LETTER, // [246] = 0xf6
INVALID_LETTER, // [247] = 0xf7
INVALID_LETTER, // [248] = 0xf8
INVALID_LETTER, // [249] = 0xf9
INVALID_LETTER, // [250] = 0xfa
INVALID_LETTER, // [251] = 0xfb
INVALID_LETTER, // [252] = 0xfc
INVALID_LETTER, // [253] = 0xfd
INVALID_LETTER, // [254] = 0xfe
INVALID_LETTER, // [255] = 0xff
};
byte g_CharToLetterNucleoGap[256] =
{
INVALID_LETTER, // [ 0] = 0x00
INVALID_LETTER, // [ 1] = 0x01
INVALID_LETTER, // [ 2] = 0x02
INVALID_LETTER, // [ 3] = 0x03
INVALID_LETTER, // [ 4] = 0x04
INVALID_LETTER, // [ 5] = 0x05
INVALID_LETTER, // [ 6] = 0x06
INVALID_LETTER, // [ 7] = 0x07
INVALID_LETTER, // [ 8] = 0x08
INVALID_LETTER, // [ 9] = 0x09
INVALID_LETTER, // [ 10] = 0x0a
INVALID_LETTER, // [ 11] = 0x0b
INVALID_LETTER, // [ 12] = 0x0c
INVALID_LETTER, // [ 13] = 0x0d
INVALID_LETTER, // [ 14] = 0x0e
INVALID_LETTER, // [ 15] = 0x0f
INVALID_LETTER, // [ 16] = 0x10
INVALID_LETTER, // [ 17] = 0x11
INVALID_LETTER, // [ 18] = 0x12
INVALID_LETTER, // [ 19] = 0x13
INVALID_LETTER, // [ 20] = 0x14
INVALID_LETTER, // [ 21] = 0x15
INVALID_LETTER, // [ 22] = 0x16
INVALID_LETTER, // [ 23] = 0x17
INVALID_LETTER, // [ 24] = 0x18
INVALID_LETTER, // [ 25] = 0x19
INVALID_LETTER, // [ 26] = 0x1a
INVALID_LETTER, // [ 27] = 0x1b
INVALID_LETTER, // [ 28] = 0x1c
INVALID_LETTER, // [ 29] = 0x1d
INVALID_LETTER, // [ 30] = 0x1e
INVALID_LETTER, // [ 31] = 0x1f
INVALID_LETTER, // [ 32] = 32
INVALID_LETTER, // [ 33] = 33
INVALID_LETTER, // [ 34] = 34
INVALID_LETTER, // [ 35] = 35
INVALID_LETTER, // [ 36] = 36
INVALID_LETTER, // [ 37] = 37
INVALID_LETTER, // [ 38] = 38
INVALID_LETTER, // [ 39] = 39
INVALID_LETTER, // [ 40] = 40
INVALID_LETTER, // [ 41] = 41
INVALID_LETTER, // [ 42] = 42
INVALID_LETTER, // [ 43] = 43
INVALID_LETTER, // [ 44] = 44
4 , // [ 45] = - (gap)
INVALID_LETTER, // [ 46] = 46
INVALID_LETTER, // [ 47] = 47
INVALID_LETTER, // [ 48] = 48
INVALID_LETTER, // [ 49] = 49
INVALID_LETTER, // [ 50] = 50
INVALID_LETTER, // [ 51] = 51
INVALID_LETTER, // [ 52] = 52
INVALID_LETTER, // [ 53] = 53
INVALID_LETTER, // [ 54] = 54
INVALID_LETTER, // [ 55] = 55
INVALID_LETTER, // [ 56] = 56
INVALID_LETTER, // [ 57] = 57
INVALID_LETTER, // [ 58] = 58
INVALID_LETTER, // [ 59] = 59
INVALID_LETTER, // [ 60] = 60
INVALID_LETTER, // [ 61] = 61
INVALID_LETTER, // [ 62] = 62
INVALID_LETTER, // [ 63] = 63
INVALID_LETTER, // [ 64] = 64
0 , // [ 65] = A (Nucleotide)
INVALID_LETTER, // [ 66] = 66
1 , // [ 67] = C (Nucleotide)
INVALID_LETTER, // [ 68] = 68
INVALID_LETTER, // [ 69] = 69
INVALID_LETTER, // [ 70] = 70
2 , // [ 71] = G (Nucleotide)
INVALID_LETTER, // [ 72] = 72
INVALID_LETTER, // [ 73] = 73
INVALID_LETTER, // [ 74] = 74
INVALID_LETTER, // [ 75] = 75
INVALID_LETTER, // [ 76] = 76
INVALID_LETTER, // [ 77] = 77
INVALID_LETTER, // [ 78] = 78
INVALID_LETTER, // [ 79] = 79
INVALID_LETTER, // [ 80] = 80
INVALID_LETTER, // [ 81] = 81
INVALID_LETTER, // [ 82] = 82
INVALID_LETTER, // [ 83] = 83
3 , // [ 84] = T (Nucleotide)
3 , // [ 85] = U (Nucleotide)
INVALID_LETTER, // [ 86] = 86
INVALID_LETTER, // [ 87] = 87
INVALID_LETTER, // [ 88] = 88
INVALID_LETTER, // [ 89] = 89
INVALID_LETTER, // [ 90] = 90
INVALID_LETTER, // [ 91] = 91
INVALID_LETTER, // [ 92] = 92
INVALID_LETTER, // [ 93] = 93
INVALID_LETTER, // [ 94] = 94
INVALID_LETTER, // [ 95] = 95
INVALID_LETTER, // [ 96] = 96
0 , // [ 97] = a (Nucleotide)
INVALID_LETTER, // [ 98] = 98
1 , // [ 99] = c (Nucleotide)
INVALID_LETTER, // [100] = 100
INVALID_LETTER, // [101] = 101
INVALID_LETTER, // [102] = 102
2 , // [103] = g (Nucleotide)
INVALID_LETTER, // [104] = 104
INVALID_LETTER, // [105] = 105
INVALID_LETTER, // [106] = 106
INVALID_LETTER, // [107] = 107
INVALID_LETTER, // [108] = 108
INVALID_LETTER, // [109] = 109
INVALID_LETTER, // [110] = 110
INVALID_LETTER, // [111] = 111
INVALID_LETTER, // [112] = 112
INVALID_LETTER, // [113] = 113
INVALID_LETTER, // [114] = 114
INVALID_LETTER, // [115] = 115
3 , // [116] = t (Nucleotide)
3 , // [117] = u (Nucleotide)
INVALID_LETTER, // [118] = 118
INVALID_LETTER, // [119] = 119
INVALID_LETTER, // [120] = 120
INVALID_LETTER, // [121] = 121
INVALID_LETTER, // [122] = 122
INVALID_LETTER, // [123] = 123
INVALID_LETTER, // [124] = 124
INVALID_LETTER, // [125] = 125
INVALID_LETTER, // [126] = 126
INVALID_LETTER, // [127] = 0x7f
INVALID_LETTER, // [128] = 0x80
INVALID_LETTER, // [129] = 0x81
INVALID_LETTER, // [130] = 0x82
INVALID_LETTER, // [131] = 0x83
INVALID_LETTER, // [132] = 0x84
INVALID_LETTER, // [133] = 0x85
INVALID_LETTER, // [134] = 0x86
INVALID_LETTER, // [135] = 0x87
INVALID_LETTER, // [136] = 0x88
INVALID_LETTER, // [137] = 0x89
INVALID_LETTER, // [138] = 0x8a
INVALID_LETTER, // [139] = 0x8b
INVALID_LETTER, // [140] = 0x8c
INVALID_LETTER, // [141] = 0x8d
INVALID_LETTER, // [142] = 0x8e
INVALID_LETTER, // [143] = 0x8f
INVALID_LETTER, // [144] = 0x90
INVALID_LETTER, // [145] = 0x91
INVALID_LETTER, // [146] = 0x92
INVALID_LETTER, // [147] = 0x93
INVALID_LETTER, // [148] = 0x94
INVALID_LETTER, // [149] = 0x95
INVALID_LETTER, // [150] = 0x96
INVALID_LETTER, // [151] = 0x97
INVALID_LETTER, // [152] = 0x98
INVALID_LETTER, // [153] = 0x99
INVALID_LETTER, // [154] = 0x9a
INVALID_LETTER, // [155] = 0x9b
INVALID_LETTER, // [156] = 0x9c
INVALID_LETTER, // [157] = 0x9d
INVALID_LETTER, // [158] = 0x9e
INVALID_LETTER, // [159] = 0x9f
INVALID_LETTER, // [160] = 0xa0
INVALID_LETTER, // [161] = 0xa1
INVALID_LETTER, // [162] = 0xa2
INVALID_LETTER, // [163] = 0xa3
INVALID_LETTER, // [164] = 0xa4
INVALID_LETTER, // [165] = 0xa5
INVALID_LETTER, // [166] = 0xa6
INVALID_LETTER, // [167] = 0xa7
INVALID_LETTER, // [168] = 0xa8
INVALID_LETTER, // [169] = 0xa9
INVALID_LETTER, // [170] = 0xaa
INVALID_LETTER, // [171] = 0xab
INVALID_LETTER, // [172] = 0xac
INVALID_LETTER, // [173] = 0xad
INVALID_LETTER, // [174] = 0xae
INVALID_LETTER, // [175] = 0xaf
INVALID_LETTER, // [176] = 0xb0
INVALID_LETTER, // [177] = 0xb1
INVALID_LETTER, // [178] = 0xb2
INVALID_LETTER, // [179] = 0xb3
INVALID_LETTER, // [180] = 0xb4
INVALID_LETTER, // [181] = 0xb5
INVALID_LETTER, // [182] = 0xb6
INVALID_LETTER, // [183] = 0xb7
INVALID_LETTER, // [184] = 0xb8
INVALID_LETTER, // [185] = 0xb9
INVALID_LETTER, // [186] = 0xba
INVALID_LETTER, // [187] = 0xbb
INVALID_LETTER, // [188] = 0xbc
INVALID_LETTER, // [189] = 0xbd
INVALID_LETTER, // [190] = 0xbe
INVALID_LETTER, // [191] = 0xbf
INVALID_LETTER, // [192] = 0xc0
INVALID_LETTER, // [193] = 0xc1
INVALID_LETTER, // [194] = 0xc2
INVALID_LETTER, // [195] = 0xc3
INVALID_LETTER, // [196] = 0xc4
INVALID_LETTER, // [197] = 0xc5
INVALID_LETTER, // [198] = 0xc6
INVALID_LETTER, // [199] = 0xc7
INVALID_LETTER, // [200] = 0xc8
INVALID_LETTER, // [201] = 0xc9
INVALID_LETTER, // [202] = 0xca
INVALID_LETTER, // [203] = 0xcb
INVALID_LETTER, // [204] = 0xcc
INVALID_LETTER, // [205] = 0xcd
INVALID_LETTER, // [206] = 0xce
INVALID_LETTER, // [207] = 0xcf
INVALID_LETTER, // [208] = 0xd0
INVALID_LETTER, // [209] = 0xd1
INVALID_LETTER, // [210] = 0xd2
INVALID_LETTER, // [211] = 0xd3
INVALID_LETTER, // [212] = 0xd4
INVALID_LETTER, // [213] = 0xd5
INVALID_LETTER, // [214] = 0xd6
INVALID_LETTER, // [215] = 0xd7
INVALID_LETTER, // [216] = 0xd8
INVALID_LETTER, // [217] = 0xd9
INVALID_LETTER, // [218] = 0xda
INVALID_LETTER, // [219] = 0xdb
INVALID_LETTER, // [220] = 0xdc
INVALID_LETTER, // [221] = 0xdd
INVALID_LETTER, // [222] = 0xde
INVALID_LETTER, // [223] = 0xdf
INVALID_LETTER, // [224] = 0xe0
INVALID_LETTER, // [225] = 0xe1
INVALID_LETTER, // [226] = 0xe2
INVALID_LETTER, // [227] = 0xe3
INVALID_LETTER, // [228] = 0xe4
INVALID_LETTER, // [229] = 0xe5
INVALID_LETTER, // [230] = 0xe6
INVALID_LETTER, // [231] = 0xe7
INVALID_LETTER, // [232] = 0xe8
INVALID_LETTER, // [233] = 0xe9
INVALID_LETTER, // [234] = 0xea
INVALID_LETTER, // [235] = 0xeb
INVALID_LETTER, // [236] = 0xec
INVALID_LETTER, // [237] = 0xed
INVALID_LETTER, // [238] = 0xee
INVALID_LETTER, // [239] = 0xef
INVALID_LETTER, // [240] = 0xf0
INVALID_LETTER, // [241] = 0xf1
INVALID_LETTER, // [242] = 0xf2
INVALID_LETTER, // [243] = 0xf3
INVALID_LETTER, // [244] = 0xf4
INVALID_LETTER, // [245] = 0xf5
INVALID_LETTER, // [246] = 0xf6
INVALID_LETTER, // [247] = 0xf7
INVALID_LETTER, // [248] = 0xf8
INVALID_LETTER, // [249] = 0xf9
INVALID_LETTER, // [250] = 0xfa
INVALID_LETTER, // [251] = 0xfb
INVALID_LETTER, // [252] = 0xfc
INVALID_LETTER, // [253] = 0xfd
INVALID_LETTER, // [254] = 0xfe
INVALID_LETTER, // [255] = 0xff
};
byte g_CharToLetterNucleoMasked[256] =
{
INVALID_LETTER, // [ 0] = 0x00
INVALID_LETTER, // [ 1] = 0x01
INVALID_LETTER, // [ 2] = 0x02
INVALID_LETTER, // [ 3] = 0x03
INVALID_LETTER, // [ 4] = 0x04
INVALID_LETTER, // [ 5] = 0x05
INVALID_LETTER, // [ 6] = 0x06
INVALID_LETTER, // [ 7] = 0x07
INVALID_LETTER, // [ 8] = 0x08
INVALID_LETTER, // [ 9] = 0x09
INVALID_LETTER, // [ 10] = 0x0a
INVALID_LETTER, // [ 11] = 0x0b
INVALID_LETTER, // [ 12] = 0x0c
INVALID_LETTER, // [ 13] = 0x0d
INVALID_LETTER, // [ 14] = 0x0e
INVALID_LETTER, // [ 15] = 0x0f
INVALID_LETTER, // [ 16] = 0x10
INVALID_LETTER, // [ 17] = 0x11
INVALID_LETTER, // [ 18] = 0x12
INVALID_LETTER, // [ 19] = 0x13
INVALID_LETTER, // [ 20] = 0x14
INVALID_LETTER, // [ 21] = 0x15
INVALID_LETTER, // [ 22] = 0x16
INVALID_LETTER, // [ 23] = 0x17
INVALID_LETTER, // [ 24] = 0x18
INVALID_LETTER, // [ 25] = 0x19
INVALID_LETTER, // [ 26] = 0x1a
INVALID_LETTER, // [ 27] = 0x1b
INVALID_LETTER, // [ 28] = 0x1c
INVALID_LETTER, // [ 29] = 0x1d
INVALID_LETTER, // [ 30] = 0x1e
INVALID_LETTER, // [ 31] = 0x1f
INVALID_LETTER, // [ 32] = 32
INVALID_LETTER, // [ 33] = 33
INVALID_LETTER, // [ 34] = 34
INVALID_LETTER, // [ 35] = 35
INVALID_LETTER, // [ 36] = 36
INVALID_LETTER, // [ 37] = 37
INVALID_LETTER, // [ 38] = 38
INVALID_LETTER, // [ 39] = 39
INVALID_LETTER, // [ 40] = 40
INVALID_LETTER, // [ 41] = 41
INVALID_LETTER, // [ 42] = 42
INVALID_LETTER, // [ 43] = 43
INVALID_LETTER, // [ 44] = 44
INVALID_LETTER, // [ 45] = 45
INVALID_LETTER, // [ 46] = 46
INVALID_LETTER, // [ 47] = 47
INVALID_LETTER, // [ 48] = 48
INVALID_LETTER, // [ 49] = 49
INVALID_LETTER, // [ 50] = 50
INVALID_LETTER, // [ 51] = 51
INVALID_LETTER, // [ 52] = 52
INVALID_LETTER, // [ 53] = 53
INVALID_LETTER, // [ 54] = 54
INVALID_LETTER, // [ 55] = 55
INVALID_LETTER, // [ 56] = 56
INVALID_LETTER, // [ 57] = 57
INVALID_LETTER, // [ 58] = 58
INVALID_LETTER, // [ 59] = 59
INVALID_LETTER, // [ 60] = 60
INVALID_LETTER, // [ 61] = 61
INVALID_LETTER, // [ 62] = 62
INVALID_LETTER, // [ 63] = 63
INVALID_LETTER, // [ 64] = 64
0 , // [ 65] = A (Nucleotide)
INVALID_LETTER, // [ 66] = 66
1 , // [ 67] = C (Nucleotide)
INVALID_LETTER, // [ 68] = 68
INVALID_LETTER, // [ 69] = 69
INVALID_LETTER, // [ 70] = 70
2 , // [ 71] = G (Nucleotide)
INVALID_LETTER, // [ 72] = 72
INVALID_LETTER, // [ 73] = 73
INVALID_LETTER, // [ 74] = 74
INVALID_LETTER, // [ 75] = 75
INVALID_LETTER, // [ 76] = 76
INVALID_LETTER, // [ 77] = 77
INVALID_LETTER, // [ 78] = 78
INVALID_LETTER, // [ 79] = 79
INVALID_LETTER, // [ 80] = 80
INVALID_LETTER, // [ 81] = 81
INVALID_LETTER, // [ 82] = 82
INVALID_LETTER, // [ 83] = 83
3 , // [ 84] = T (Nucleotide)
3 , // [ 85] = U (Nucleotide)
INVALID_LETTER, // [ 86] = 86
INVALID_LETTER, // [ 87] = 87
INVALID_LETTER, // [ 88] = 88
INVALID_LETTER, // [ 89] = 89
INVALID_LETTER, // [ 90] = 90
INVALID_LETTER, // [ 91] = 91
INVALID_LETTER, // [ 92] = 92
INVALID_LETTER, // [ 93] = 93
INVALID_LETTER, // [ 94] = 94
INVALID_LETTER, // [ 95] = 95
INVALID_LETTER, // [ 96] = 96
INVALID_LETTER, // [ 97] = 0x61
INVALID_LETTER, // [ 98] = 0x62
INVALID_LETTER, // [ 99] = 0x63
INVALID_LETTER, // [100] = 0x64
INVALID_LETTER, // [101] = 0x65
INVALID_LETTER, // [102] = 0x66
INVALID_LETTER, // [103] = 0x67
INVALID_LETTER, // [104] = 0x68
INVALID_LETTER, // [105] = 0x69
INVALID_LETTER, // [106] = 0x6a
INVALID_LETTER, // [107] = 0x6b
INVALID_LETTER, // [108] = 0x6c
INVALID_LETTER, // [109] = 0x6d
INVALID_LETTER, // [110] = 0x6e
INVALID_LETTER, // [111] = 0x6f
INVALID_LETTER, // [112] = 0x70
INVALID_LETTER, // [113] = 0x71
INVALID_LETTER, // [114] = 0x72
INVALID_LETTER, // [115] = 0x73
INVALID_LETTER, // [116] = 0x74
INVALID_LETTER, // [117] = 0x75
INVALID_LETTER, // [118] = 0x76
INVALID_LETTER, // [119] = 0x77
INVALID_LETTER, // [120] = 0x78
INVALID_LETTER, // [121] = 0x79
INVALID_LETTER, // [122] = 0x7a
INVALID_LETTER, // [123] = 123
INVALID_LETTER, // [124] = 124
INVALID_LETTER, // [125] = 125
INVALID_LETTER, // [126] = 126
INVALID_LETTER, // [127] = 0x7f
INVALID_LETTER, // [128] = 0x80
INVALID_LETTER, // [129] = 0x81
INVALID_LETTER, // [130] = 0x82
INVALID_LETTER, // [131] = 0x83
INVALID_LETTER, // [132] = 0x84
INVALID_LETTER, // [133] = 0x85
INVALID_LETTER, // [134] = 0x86
INVALID_LETTER, // [135] = 0x87
INVALID_LETTER, // [136] = 0x88
INVALID_LETTER, // [137] = 0x89
INVALID_LETTER, // [138] = 0x8a
INVALID_LETTER, // [139] = 0x8b
INVALID_LETTER, // [140] = 0x8c
INVALID_LETTER, // [141] = 0x8d
INVALID_LETTER, // [142] = 0x8e
INVALID_LETTER, // [143] = 0x8f
INVALID_LETTER, // [144] = 0x90
INVALID_LETTER, // [145] = 0x91
INVALID_LETTER, // [146] = 0x92
INVALID_LETTER, // [147] = 0x93
INVALID_LETTER, // [148] = 0x94
INVALID_LETTER, // [149] = 0x95
INVALID_LETTER, // [150] = 0x96
INVALID_LETTER, // [151] = 0x97
INVALID_LETTER, // [152] = 0x98
INVALID_LETTER, // [153] = 0x99
INVALID_LETTER, // [154] = 0x9a
INVALID_LETTER, // [155] = 0x9b
INVALID_LETTER, // [156] = 0x9c
INVALID_LETTER, // [157] = 0x9d
INVALID_LETTER, // [158] = 0x9e
INVALID_LETTER, // [159] = 0x9f
INVALID_LETTER, // [160] = 0xa0
INVALID_LETTER, // [161] = 0xa1
INVALID_LETTER, // [162] = 0xa2
INVALID_LETTER, // [163] = 0xa3
INVALID_LETTER, // [164] = 0xa4
INVALID_LETTER, // [165] = 0xa5
INVALID_LETTER, // [166] = 0xa6
INVALID_LETTER, // [167] = 0xa7
INVALID_LETTER, // [168] = 0xa8
INVALID_LETTER, // [169] = 0xa9
INVALID_LETTER, // [170] = 0xaa
INVALID_LETTER, // [171] = 0xab
INVALID_LETTER, // [172] = 0xac
INVALID_LETTER, // [173] = 0xad
INVALID_LETTER, // [174] = 0xae
INVALID_LETTER, // [175] = 0xaf
INVALID_LETTER, // [176] = 0xb0
INVALID_LETTER, // [177] = 0xb1
INVALID_LETTER, // [178] = 0xb2
INVALID_LETTER, // [179] = 0xb3
INVALID_LETTER, // [180] = 0xb4
INVALID_LETTER, // [181] = 0xb5
INVALID_LETTER, // [182] = 0xb6
INVALID_LETTER, // [183] = 0xb7
INVALID_LETTER, // [184] = 0xb8
INVALID_LETTER, // [185] = 0xb9
INVALID_LETTER, // [186] = 0xba
INVALID_LETTER, // [187] = 0xbb
INVALID_LETTER, // [188] = 0xbc
INVALID_LETTER, // [189] = 0xbd
INVALID_LETTER, // [190] = 0xbe
INVALID_LETTER, // [191] = 0xbf
INVALID_LETTER, // [192] = 0xc0
INVALID_LETTER, // [193] = 0xc1
INVALID_LETTER, // [194] = 0xc2
INVALID_LETTER, // [195] = 0xc3
INVALID_LETTER, // [196] = 0xc4
INVALID_LETTER, // [197] = 0xc5
INVALID_LETTER, // [198] = 0xc6
INVALID_LETTER, // [199] = 0xc7
INVALID_LETTER, // [200] = 0xc8
INVALID_LETTER, // [201] = 0xc9
INVALID_LETTER, // [202] = 0xca
INVALID_LETTER, // [203] = 0xcb
INVALID_LETTER, // [204] = 0xcc
INVALID_LETTER, // [205] = 0xcd
INVALID_LETTER, // [206] = 0xce
INVALID_LETTER, // [207] = 0xcf
INVALID_LETTER, // [208] = 0xd0
INVALID_LETTER, // [209] = 0xd1
INVALID_LETTER, // [210] = 0xd2
INVALID_LETTER, // [211] = 0xd3
INVALID_LETTER, // [212] = 0xd4
INVALID_LETTER, // [213] = 0xd5
INVALID_LETTER, // [214] = 0xd6
INVALID_LETTER, // [215] = 0xd7
INVALID_LETTER, // [216] = 0xd8
INVALID_LETTER, // [217] = 0xd9
INVALID_LETTER, // [218] = 0xda
INVALID_LETTER, // [219] = 0xdb
INVALID_LETTER, // [220] = 0xdc
INVALID_LETTER, // [221] = 0xdd
INVALID_LETTER, // [222] = 0xde
INVALID_LETTER, // [223] = 0xdf
INVALID_LETTER, // [224] = 0xe0
INVALID_LETTER, // [225] = 0xe1
INVALID_LETTER, // [226] = 0xe2
INVALID_LETTER, // [227] = 0xe3
INVALID_LETTER, // [228] = 0xe4
INVALID_LETTER, // [229] = 0xe5
INVALID_LETTER, // [230] = 0xe6
INVALID_LETTER, // [231] = 0xe7
INVALID_LETTER, // [232] = 0xe8
INVALID_LETTER, // [233] = 0xe9
INVALID_LETTER, // [234] = 0xea
INVALID_LETTER, // [235] = 0xeb
INVALID_LETTER, // [236] = 0xec
INVALID_LETTER, // [237] = 0xed
INVALID_LETTER, // [238] = 0xee
INVALID_LETTER, // [239] = 0xef
INVALID_LETTER, // [240] = 0xf0
INVALID_LETTER, // [241] = 0xf1
INVALID_LETTER, // [242] = 0xf2
INVALID_LETTER, // [243] = 0xf3
INVALID_LETTER, // [244] = 0xf4
INVALID_LETTER, // [245] = 0xf5
INVALID_LETTER, // [246] = 0xf6
INVALID_LETTER, // [247] = 0xf7
INVALID_LETTER, // [248] = 0xf8
INVALID_LETTER, // [249] = 0xf9
INVALID_LETTER, // [250] = 0xfa
INVALID_LETTER, // [251] = 0xfb
INVALID_LETTER, // [252] = 0xfc
INVALID_LETTER, // [253] = 0xfd
INVALID_LETTER, // [254] = 0xfe
INVALID_LETTER, // [255] = 0xff
};
byte g_CharToLetterNucleoW[256] =
{
0 , // [ 0] = 0x00
0 , // [ 1] = 0x01
0 , // [ 2] = 0x02
0 , // [ 3] = 0x03
0 , // [ 4] = 0x04
0 , // [ 5] = 0x05
0 , // [ 6] = 0x06
0 , // [ 7] = 0x07
0 , // [ 8] = 0x08
0 , // [ 9] = 0x09
0 , // [ 10] = 0x0a
0 , // [ 11] = 0x0b
0 , // [ 12] = 0x0c
0 , // [ 13] = 0x0d
0 , // [ 14] = 0x0e
0 , // [ 15] = 0x0f
0 , // [ 16] = 0x10
0 , // [ 17] = 0x11
0 , // [ 18] = 0x12
0 , // [ 19] = 0x13
0 , // [ 20] = 0x14
0 , // [ 21] = 0x15
0 , // [ 22] = 0x16
0 , // [ 23] = 0x17
0 , // [ 24] = 0x18
0 , // [ 25] = 0x19
0 , // [ 26] = 0x1a
0 , // [ 27] = 0x1b
0 , // [ 28] = 0x1c
0 , // [ 29] = 0x1d
0 , // [ 30] = 0x1e
0 , // [ 31] = 0x1f
0 , // [ 32] = 32
0 , // [ 33] = 33
0 , // [ 34] = 34
0 , // [ 35] = 35
0 , // [ 36] = 36
0 , // [ 37] = 37
0 , // [ 38] = 38
0 , // [ 39] = 39
0 , // [ 40] = 40
0 , // [ 41] = 41
0 , // [ 42] = 42
0 , // [ 43] = 43
0 , // [ 44] = 44
0 , // [ 45] = 45
0 , // [ 46] = 46
0 , // [ 47] = 47
0 , // [ 48] = 48
0 , // [ 49] = 49
0 , // [ 50] = 50
0 , // [ 51] = 51
0 , // [ 52] = 52
0 , // [ 53] = 53
0 , // [ 54] = 54
0 , // [ 55] = 55
0 , // [ 56] = 56
0 , // [ 57] = 57
0 , // [ 58] = 58
0 , // [ 59] = 59
0 , // [ 60] = 60
0 , // [ 61] = 61
0 , // [ 62] = 62
0 , // [ 63] = 63
0 , // [ 64] = 64
0 , // [ 65] = A (Nucleotide)
1 , // [ 66] = B (Wildcard)
1 , // [ 67] = C (Nucleotide)
0 , // [ 68] = D (Wildcard)
0 , // [ 69] = 69
0 , // [ 70] = 70
2 , // [ 71] = G (Nucleotide)
0 , // [ 72] = H (Wildcard)
0 , // [ 73] = 73
0 , // [ 74] = 74
2 , // [ 75] = K (Wildcard)
0 , // [ 76] = 76
0 , // [ 77] = M (Wildcard)
2 , // [ 78] = N (Wildcard)
0 , // [ 79] = 79
0 , // [ 80] = 80
0 , // [ 81] = 81
0 , // [ 82] = R (Wildcard)
1 , // [ 83] = S (Wildcard)
3 , // [ 84] = T (Nucleotide)
3 , // [ 85] = U (Nucleotide)
0 , // [ 86] = V (Wildcard)
0 , // [ 87] = W (Wildcard)
2 , // [ 88] = X (Wildcard)
1 , // [ 89] = Y (Wildcard)
0 , // [ 90] = 90
0 , // [ 91] = 91
0 , // [ 92] = 92
0 , // [ 93] = 93
0 , // [ 94] = 94
0 , // [ 95] = 95
0 , // [ 96] = 96
0 , // [ 97] = a (Nucleotide)
1 , // [ 98] = b (Nucleotide)
1 , // [ 99] = c (Nucleotide)
0 , // [100] = d (Nucleotide)
0 , // [101] = 101
0 , // [102] = 102
2 , // [103] = g (Nucleotide)
0 , // [104] = h (Nucleotide)
0 , // [105] = 105
0 , // [106] = 106
2 , // [107] = k (Nucleotide)
0 , // [108] = 108
0 , // [109] = m (Nucleotide)
2 , // [110] = n (Nucleotide)
0 , // [111] = 111
0 , // [112] = 112
0 , // [113] = 113
0 , // [114] = r (Nucleotide)
1 , // [115] = s (Nucleotide)
3 , // [116] = t (Nucleotide)
3 , // [117] = u (Nucleotide)
0 , // [118] = v (Nucleotide)
0 , // [119] = w (Nucleotide)
2 , // [120] = x (Nucleotide)
1 , // [121] = y (Nucleotide)
0 , // [122] = 122
0 , // [123] = 123
0 , // [124] = 124
0 , // [125] = 125
0 , // [126] = 126
0 , // [127] = 0x7f
0 , // [128] = 0x80
0 , // [129] = 0x81
0 , // [130] = 0x82
0 , // [131] = 0x83
0 , // [132] = 0x84
0 , // [133] = 0x85
0 , // [134] = 0x86
0 , // [135] = 0x87
0 , // [136] = 0x88
0 , // [137] = 0x89
0 , // [138] = 0x8a
0 , // [139] = 0x8b
0 , // [140] = 0x8c
0 , // [141] = 0x8d
0 , // [142] = 0x8e
0 , // [143] = 0x8f
0 , // [144] = 0x90
0 , // [145] = 0x91
0 , // [146] = 0x92
0 , // [147] = 0x93
0 , // [148] = 0x94
0 , // [149] = 0x95
0 , // [150] = 0x96
0 , // [151] = 0x97
0 , // [152] = 0x98
0 , // [153] = 0x99
0 , // [154] = 0x9a
0 , // [155] = 0x9b
0 , // [156] = 0x9c
0 , // [157] = 0x9d
0 , // [158] = 0x9e
0 , // [159] = 0x9f
0 , // [160] = 0xa0
0 , // [161] = 0xa1
0 , // [162] = 0xa2
0 , // [163] = 0xa3
0 , // [164] = 0xa4
0 , // [165] = 0xa5
0 , // [166] = 0xa6
0 , // [167] = 0xa7
0 , // [168] = 0xa8
0 , // [169] = 0xa9
0 , // [170] = 0xaa
0 , // [171] = 0xab
0 , // [172] = 0xac
0 , // [173] = 0xad
0 , // [174] = 0xae
0 , // [175] = 0xaf
0 , // [176] = 0xb0
0 , // [177] = 0xb1
0 , // [178] = 0xb2
0 , // [179] = 0xb3
0 , // [180] = 0xb4
0 , // [181] = 0xb5
0 , // [182] = 0xb6
0 , // [183] = 0xb7
0 , // [184] = 0xb8
0 , // [185] = 0xb9
0 , // [186] = 0xba
0 , // [187] = 0xbb
0 , // [188] = 0xbc
0 , // [189] = 0xbd
0 , // [190] = 0xbe
0 , // [191] = 0xbf
0 , // [192] = 0xc0
0 , // [193] = 0xc1
0 , // [194] = 0xc2
0 , // [195] = 0xc3
0 , // [196] = 0xc4
0 , // [197] = 0xc5
0 , // [198] = 0xc6
0 , // [199] = 0xc7
0 , // [200] = 0xc8
0 , // [201] = 0xc9
0 , // [202] = 0xca
0 , // [203] = 0xcb
0 , // [204] = 0xcc
0 , // [205] = 0xcd
0 , // [206] = 0xce
0 , // [207] = 0xcf
0 , // [208] = 0xd0
0 , // [209] = 0xd1
0 , // [210] = 0xd2
0 , // [211] = 0xd3
0 , // [212] = 0xd4
0 , // [213] = 0xd5
0 , // [214] = 0xd6
0 , // [215] = 0xd7
0 , // [216] = 0xd8
0 , // [217] = 0xd9
0 , // [218] = 0xda
0 , // [219] = 0xdb
0 , // [220] = 0xdc
0 , // [221] = 0xdd
0 , // [222] = 0xde
0 , // [223] = 0xdf
0 , // [224] = 0xe0
0 , // [225] = 0xe1
0 , // [226] = 0xe2
0 , // [227] = 0xe3
0 , // [228] = 0xe4
0 , // [229] = 0xe5
0 , // [230] = 0xe6
0 , // [231] = 0xe7
0 , // [232] = 0xe8
0 , // [233] = 0xe9
0 , // [234] = 0xea
0 , // [235] = 0xeb
0 , // [236] = 0xec
0 , // [237] = 0xed
0 , // [238] = 0xee
0 , // [239] = 0xef
0 , // [240] = 0xf0
0 , // [241] = 0xf1
0 , // [242] = 0xf2
0 , // [243] = 0xf3
0 , // [244] = 0xf4
0 , // [245] = 0xf5
0 , // [246] = 0xf6
0 , // [247] = 0xf7
0 , // [248] = 0xf8
0 , // [249] = 0xf9
0 , // [250] = 0xfa
0 , // [251] = 0xfb
0 , // [252] = 0xfc
0 , // [253] = 0xfd
0 , // [254] = 0xfe
0 , // [255] = 0xff
};
byte g_LetterToCharNucleo[256] =
{
'A', // [0]
'C', // [1]
'G', // [2]
'T', // [3]
INVALID_CHAR, // [4]
INVALID_CHAR, // [5]
INVALID_CHAR, // [6]
INVALID_CHAR, // [7]
INVALID_CHAR, // [8]
INVALID_CHAR, // [9]
INVALID_CHAR, // [10]
INVALID_CHAR, // [11]
INVALID_CHAR, // [12]
INVALID_CHAR, // [13]
INVALID_CHAR, // [14]
INVALID_CHAR, // [15]
INVALID_CHAR, // [16]
INVALID_CHAR, // [17]
INVALID_CHAR, // [18]
INVALID_CHAR, // [19]
INVALID_CHAR, // [20]
INVALID_CHAR, // [21]
INVALID_CHAR, // [22]
INVALID_CHAR, // [23]
INVALID_CHAR, // [24]
INVALID_CHAR, // [25]
INVALID_CHAR, // [26]
INVALID_CHAR, // [27]
INVALID_CHAR, // [28]
INVALID_CHAR, // [29]
INVALID_CHAR, // [30]
INVALID_CHAR, // [31]
INVALID_CHAR, // [32]
INVALID_CHAR, // [33]
INVALID_CHAR, // [34]
INVALID_CHAR, // [35]
INVALID_CHAR, // [36]
INVALID_CHAR, // [37]
INVALID_CHAR, // [38]
INVALID_CHAR, // [39]
INVALID_CHAR, // [40]
INVALID_CHAR, // [41]
INVALID_CHAR, // [42]
INVALID_CHAR, // [43]
INVALID_CHAR, // [44]
INVALID_CHAR, // [45]
INVALID_CHAR, // [46]
INVALID_CHAR, // [47]
INVALID_CHAR, // [48]
INVALID_CHAR, // [49]
INVALID_CHAR, // [50]
INVALID_CHAR, // [51]
INVALID_CHAR, // [52]
INVALID_CHAR, // [53]
INVALID_CHAR, // [54]
INVALID_CHAR, // [55]
INVALID_CHAR, // [56]
INVALID_CHAR, // [57]
INVALID_CHAR, // [58]
INVALID_CHAR, // [59]
INVALID_CHAR, // [60]
INVALID_CHAR, // [61]
INVALID_CHAR, // [62]
INVALID_CHAR, // [63]
INVALID_CHAR, // [64]
INVALID_CHAR, // [65]
INVALID_CHAR, // [66]
INVALID_CHAR, // [67]
INVALID_CHAR, // [68]
INVALID_CHAR, // [69]
INVALID_CHAR, // [70]
INVALID_CHAR, // [71]
INVALID_CHAR, // [72]
INVALID_CHAR, // [73]
INVALID_CHAR, // [74]
INVALID_CHAR, // [75]
INVALID_CHAR, // [76]
INVALID_CHAR, // [77]
INVALID_CHAR, // [78]
INVALID_CHAR, // [79]
INVALID_CHAR, // [80]
INVALID_CHAR, // [81]
INVALID_CHAR, // [82]
INVALID_CHAR, // [83]
INVALID_CHAR, // [84]
INVALID_CHAR, // [85]
INVALID_CHAR, // [86]
INVALID_CHAR, // [87]
INVALID_CHAR, // [88]
INVALID_CHAR, // [89]
INVALID_CHAR, // [90]
INVALID_CHAR, // [91]
INVALID_CHAR, // [92]
INVALID_CHAR, // [93]
INVALID_CHAR, // [94]
INVALID_CHAR, // [95]
INVALID_CHAR, // [96]
INVALID_CHAR, // [97]
INVALID_CHAR, // [98]
INVALID_CHAR, // [99]
INVALID_CHAR, // [100]
INVALID_CHAR, // [101]
INVALID_CHAR, // [102]
INVALID_CHAR, // [103]
INVALID_CHAR, // [104]
INVALID_CHAR, // [105]
INVALID_CHAR, // [106]
INVALID_CHAR, // [107]
INVALID_CHAR, // [108]
INVALID_CHAR, // [109]
INVALID_CHAR, // [110]
INVALID_CHAR, // [111]
INVALID_CHAR, // [112]
INVALID_CHAR, // [113]
INVALID_CHAR, // [114]
INVALID_CHAR, // [115]
INVALID_CHAR, // [116]
INVALID_CHAR, // [117]
INVALID_CHAR, // [118]
INVALID_CHAR, // [119]
INVALID_CHAR, // [120]
INVALID_CHAR, // [121]
INVALID_CHAR, // [122]
INVALID_CHAR, // [123]
INVALID_CHAR, // [124]
INVALID_CHAR, // [125]
INVALID_CHAR, // [126]
INVALID_CHAR, // [127]
INVALID_CHAR, // [128]
INVALID_CHAR, // [129]
INVALID_CHAR, // [130]
INVALID_CHAR, // [131]
INVALID_CHAR, // [132]
INVALID_CHAR, // [133]
INVALID_CHAR, // [134]
INVALID_CHAR, // [135]
INVALID_CHAR, // [136]
INVALID_CHAR, // [137]
INVALID_CHAR, // [138]
INVALID_CHAR, // [139]
INVALID_CHAR, // [140]
INVALID_CHAR, // [141]
INVALID_CHAR, // [142]
INVALID_CHAR, // [143]
INVALID_CHAR, // [144]
INVALID_CHAR, // [145]
INVALID_CHAR, // [146]
INVALID_CHAR, // [147]
INVALID_CHAR, // [148]
INVALID_CHAR, // [149]
INVALID_CHAR, // [150]
INVALID_CHAR, // [151]
INVALID_CHAR, // [152]
INVALID_CHAR, // [153]
INVALID_CHAR, // [154]
INVALID_CHAR, // [155]
INVALID_CHAR, // [156]
INVALID_CHAR, // [157]
INVALID_CHAR, // [158]
INVALID_CHAR, // [159]
INVALID_CHAR, // [160]
INVALID_CHAR, // [161]
INVALID_CHAR, // [162]
INVALID_CHAR, // [163]
INVALID_CHAR, // [164]
INVALID_CHAR, // [165]
INVALID_CHAR, // [166]
INVALID_CHAR, // [167]
INVALID_CHAR, // [168]
INVALID_CHAR, // [169]
INVALID_CHAR, // [170]
INVALID_CHAR, // [171]
INVALID_CHAR, // [172]
INVALID_CHAR, // [173]
INVALID_CHAR, // [174]
INVALID_CHAR, // [175]
INVALID_CHAR, // [176]
INVALID_CHAR, // [177]
INVALID_CHAR, // [178]
INVALID_CHAR, // [179]
INVALID_CHAR, // [180]
INVALID_CHAR, // [181]
INVALID_CHAR, // [182]
INVALID_CHAR, // [183]
INVALID_CHAR, // [184]
INVALID_CHAR, // [185]
INVALID_CHAR, // [186]
INVALID_CHAR, // [187]
INVALID_CHAR, // [188]
INVALID_CHAR, // [189]
INVALID_CHAR, // [190]
INVALID_CHAR, // [191]
INVALID_CHAR, // [192]
INVALID_CHAR, // [193]
INVALID_CHAR, // [194]
INVALID_CHAR, // [195]
INVALID_CHAR, // [196]
INVALID_CHAR, // [197]
INVALID_CHAR, // [198]
INVALID_CHAR, // [199]
INVALID_CHAR, // [200]
INVALID_CHAR, // [201]
INVALID_CHAR, // [202]
INVALID_CHAR, // [203]
INVALID_CHAR, // [204]
INVALID_CHAR, // [205]
INVALID_CHAR, // [206]
INVALID_CHAR, // [207]
INVALID_CHAR, // [208]
INVALID_CHAR, // [209]
INVALID_CHAR, // [210]
INVALID_CHAR, // [211]
INVALID_CHAR, // [212]
INVALID_CHAR, // [213]
INVALID_CHAR, // [214]
INVALID_CHAR, // [215]
INVALID_CHAR, // [216]
INVALID_CHAR, // [217]
INVALID_CHAR, // [218]
INVALID_CHAR, // [219]
INVALID_CHAR, // [220]
INVALID_CHAR, // [221]
INVALID_CHAR, // [222]
INVALID_CHAR, // [223]
INVALID_CHAR, // [224]
INVALID_CHAR, // [225]
INVALID_CHAR, // [226]
INVALID_CHAR, // [227]
INVALID_CHAR, // [228]
INVALID_CHAR, // [229]
INVALID_CHAR, // [230]
INVALID_CHAR, // [231]
INVALID_CHAR, // [232]
INVALID_CHAR, // [233]
INVALID_CHAR, // [234]
INVALID_CHAR, // [235]
INVALID_CHAR, // [236]
INVALID_CHAR, // [237]
INVALID_CHAR, // [238]
INVALID_CHAR, // [239]
INVALID_CHAR, // [240]
INVALID_CHAR, // [241]
INVALID_CHAR, // [242]
INVALID_CHAR, // [243]
INVALID_CHAR, // [244]
INVALID_CHAR, // [245]
INVALID_CHAR, // [246]
INVALID_CHAR, // [247]
INVALID_CHAR, // [248]
INVALID_CHAR, // [249]
INVALID_CHAR, // [250]
INVALID_CHAR, // [251]
INVALID_CHAR, // [252]
INVALID_CHAR, // [253]
INVALID_CHAR, // [254]
INVALID_CHAR, // [255]
};
byte g_LetterToCharNucleoGap[256] =
{
'A', // [0]
'C', // [1]
'G', // [2]
'T', // [3]
'-', // [4]
INVALID_CHAR, // [5]
INVALID_CHAR, // [6]
INVALID_CHAR, // [7]
INVALID_CHAR, // [8]
INVALID_CHAR, // [9]
INVALID_CHAR, // [10]
INVALID_CHAR, // [11]
INVALID_CHAR, // [12]
INVALID_CHAR, // [13]
INVALID_CHAR, // [14]
INVALID_CHAR, // [15]
INVALID_CHAR, // [16]
INVALID_CHAR, // [17]
INVALID_CHAR, // [18]
INVALID_CHAR, // [19]
INVALID_CHAR, // [20]
INVALID_CHAR, // [21]
INVALID_CHAR, // [22]
INVALID_CHAR, // [23]
INVALID_CHAR, // [24]
INVALID_CHAR, // [25]
INVALID_CHAR, // [26]
INVALID_CHAR, // [27]
INVALID_CHAR, // [28]
INVALID_CHAR, // [29]
INVALID_CHAR, // [30]
INVALID_CHAR, // [31]
INVALID_CHAR, // [32]
INVALID_CHAR, // [33]
INVALID_CHAR, // [34]
INVALID_CHAR, // [35]
INVALID_CHAR, // [36]
INVALID_CHAR, // [37]
INVALID_CHAR, // [38]
INVALID_CHAR, // [39]
INVALID_CHAR, // [40]
INVALID_CHAR, // [41]
INVALID_CHAR, // [42]
INVALID_CHAR, // [43]
INVALID_CHAR, // [44]
INVALID_CHAR, // [45]
INVALID_CHAR, // [46]
INVALID_CHAR, // [47]
INVALID_CHAR, // [48]
INVALID_CHAR, // [49]
INVALID_CHAR, // [50]
INVALID_CHAR, // [51]
INVALID_CHAR, // [52]
INVALID_CHAR, // [53]
INVALID_CHAR, // [54]
INVALID_CHAR, // [55]
INVALID_CHAR, // [56]
INVALID_CHAR, // [57]
INVALID_CHAR, // [58]
INVALID_CHAR, // [59]
INVALID_CHAR, // [60]
INVALID_CHAR, // [61]
INVALID_CHAR, // [62]
INVALID_CHAR, // [63]
INVALID_CHAR, // [64]
INVALID_CHAR, // [65]
INVALID_CHAR, // [66]
INVALID_CHAR, // [67]
INVALID_CHAR, // [68]
INVALID_CHAR, // [69]
INVALID_CHAR, // [70]
INVALID_CHAR, // [71]
INVALID_CHAR, // [72]
INVALID_CHAR, // [73]
INVALID_CHAR, // [74]
INVALID_CHAR, // [75]
INVALID_CHAR, // [76]
INVALID_CHAR, // [77]
INVALID_CHAR, // [78]
INVALID_CHAR, // [79]
INVALID_CHAR, // [80]
INVALID_CHAR, // [81]
INVALID_CHAR, // [82]
INVALID_CHAR, // [83]
INVALID_CHAR, // [84]
INVALID_CHAR, // [85]
INVALID_CHAR, // [86]
INVALID_CHAR, // [87]
INVALID_CHAR, // [88]
INVALID_CHAR, // [89]
INVALID_CHAR, // [90]
INVALID_CHAR, // [91]
INVALID_CHAR, // [92]
INVALID_CHAR, // [93]
INVALID_CHAR, // [94]
INVALID_CHAR, // [95]
INVALID_CHAR, // [96]
INVALID_CHAR, // [97]
INVALID_CHAR, // [98]
INVALID_CHAR, // [99]
INVALID_CHAR, // [100]
INVALID_CHAR, // [101]
INVALID_CHAR, // [102]
INVALID_CHAR, // [103]
INVALID_CHAR, // [104]
INVALID_CHAR, // [105]
INVALID_CHAR, // [106]
INVALID_CHAR, // [107]
INVALID_CHAR, // [108]
INVALID_CHAR, // [109]
INVALID_CHAR, // [110]
INVALID_CHAR, // [111]
INVALID_CHAR, // [112]
INVALID_CHAR, // [113]
INVALID_CHAR, // [114]
INVALID_CHAR, // [115]
INVALID_CHAR, // [116]
INVALID_CHAR, // [117]
INVALID_CHAR, // [118]
INVALID_CHAR, // [119]
INVALID_CHAR, // [120]
INVALID_CHAR, // [121]
INVALID_CHAR, // [122]
INVALID_CHAR, // [123]
INVALID_CHAR, // [124]
INVALID_CHAR, // [125]
INVALID_CHAR, // [126]
INVALID_CHAR, // [127]
INVALID_CHAR, // [128]
INVALID_CHAR, // [129]
INVALID_CHAR, // [130]
INVALID_CHAR, // [131]
INVALID_CHAR, // [132]
INVALID_CHAR, // [133]
INVALID_CHAR, // [134]
INVALID_CHAR, // [135]
INVALID_CHAR, // [136]
INVALID_CHAR, // [137]
INVALID_CHAR, // [138]
INVALID_CHAR, // [139]
INVALID_CHAR, // [140]
INVALID_CHAR, // [141]
INVALID_CHAR, // [142]
INVALID_CHAR, // [143]
INVALID_CHAR, // [144]
INVALID_CHAR, // [145]
INVALID_CHAR, // [146]
INVALID_CHAR, // [147]
INVALID_CHAR, // [148]
INVALID_CHAR, // [149]
INVALID_CHAR, // [150]
INVALID_CHAR, // [151]
INVALID_CHAR, // [152]
INVALID_CHAR, // [153]
INVALID_CHAR, // [154]
INVALID_CHAR, // [155]
INVALID_CHAR, // [156]
INVALID_CHAR, // [157]
INVALID_CHAR, // [158]
INVALID_CHAR, // [159]
INVALID_CHAR, // [160]
INVALID_CHAR, // [161]
INVALID_CHAR, // [162]
INVALID_CHAR, // [163]
INVALID_CHAR, // [164]
INVALID_CHAR, // [165]
INVALID_CHAR, // [166]
INVALID_CHAR, // [167]
INVALID_CHAR, // [168]
INVALID_CHAR, // [169]
INVALID_CHAR, // [170]
INVALID_CHAR, // [171]
INVALID_CHAR, // [172]
INVALID_CHAR, // [173]
INVALID_CHAR, // [174]
INVALID_CHAR, // [175]
INVALID_CHAR, // [176]
INVALID_CHAR, // [177]
INVALID_CHAR, // [178]
INVALID_CHAR, // [179]
INVALID_CHAR, // [180]
INVALID_CHAR, // [181]
INVALID_CHAR, // [182]
INVALID_CHAR, // [183]
INVALID_CHAR, // [184]
INVALID_CHAR, // [185]
INVALID_CHAR, // [186]
INVALID_CHAR, // [187]
INVALID_CHAR, // [188]
INVALID_CHAR, // [189]
INVALID_CHAR, // [190]
INVALID_CHAR, // [191]
INVALID_CHAR, // [192]
INVALID_CHAR, // [193]
INVALID_CHAR, // [194]
INVALID_CHAR, // [195]
INVALID_CHAR, // [196]
INVALID_CHAR, // [197]
INVALID_CHAR, // [198]
INVALID_CHAR, // [199]
INVALID_CHAR, // [200]
INVALID_CHAR, // [201]
INVALID_CHAR, // [202]
INVALID_CHAR, // [203]
INVALID_CHAR, // [204]
INVALID_CHAR, // [205]
INVALID_CHAR, // [206]
INVALID_CHAR, // [207]
INVALID_CHAR, // [208]
INVALID_CHAR, // [209]
INVALID_CHAR, // [210]
INVALID_CHAR, // [211]
INVALID_CHAR, // [212]
INVALID_CHAR, // [213]
INVALID_CHAR, // [214]
INVALID_CHAR, // [215]
INVALID_CHAR, // [216]
INVALID_CHAR, // [217]
INVALID_CHAR, // [218]
INVALID_CHAR, // [219]
INVALID_CHAR, // [220]
INVALID_CHAR, // [221]
INVALID_CHAR, // [222]
INVALID_CHAR, // [223]
INVALID_CHAR, // [224]
INVALID_CHAR, // [225]
INVALID_CHAR, // [226]
INVALID_CHAR, // [227]
INVALID_CHAR, // [228]
INVALID_CHAR, // [229]
INVALID_CHAR, // [230]
INVALID_CHAR, // [231]
INVALID_CHAR, // [232]
INVALID_CHAR, // [233]
INVALID_CHAR, // [234]
INVALID_CHAR, // [235]
INVALID_CHAR, // [236]
INVALID_CHAR, // [237]
INVALID_CHAR, // [238]
INVALID_CHAR, // [239]
INVALID_CHAR, // [240]
INVALID_CHAR, // [241]
INVALID_CHAR, // [242]
INVALID_CHAR, // [243]
INVALID_CHAR, // [244]
INVALID_CHAR, // [245]
INVALID_CHAR, // [246]
INVALID_CHAR, // [247]
INVALID_CHAR, // [248]
INVALID_CHAR, // [249]
INVALID_CHAR, // [250]
INVALID_CHAR, // [251]
INVALID_CHAR, // [252]
INVALID_CHAR, // [253]
INVALID_CHAR, // [254]
INVALID_CHAR, // [255]
};
byte g_CodonWordToAminoLetter[4*4*4] =
{
8 , // [ 0] = AAA K (Lys)
11, // [ 1] = AAC N (Asn)
8 , // [ 2] = AAG K (Lys)
11, // [ 3] = AAT N (Asn)
16, // [ 4] = ACA T (Thr)
16, // [ 5] = ACC T (Thr)
16, // [ 6] = ACG T (Thr)
16, // [ 7] = ACT T (Thr)
14, // [ 8] = AGA R (Arg)
15, // [ 9] = AGC S (Ser)
14, // [10] = AGG R (Arg)
15, // [11] = AGT S (Ser)
7 , // [12] = ATA I (Ile)
7 , // [13] = ATC I (Ile)
10, // [14] = ATG M (Met)
7 , // [15] = ATT I (Ile)
13, // [16] = CAA Q (Gln)
6 , // [17] = CAC H (His)
13, // [18] = CAG Q (Gln)
6 , // [19] = CAT H (His)
12, // [20] = CCA P (Pro)
12, // [21] = CCC P (Pro)
12, // [22] = CCG P (Pro)
12, // [23] = CCT P (Pro)
14, // [24] = CGA R (Arg)
14, // [25] = CGC R (Arg)
14, // [26] = CGG R (Arg)
14, // [27] = CGT R (Arg)
9 , // [28] = CTA L (Leu)
9 , // [29] = CTC L (Leu)
9 , // [30] = CTG L (Leu)
9 , // [31] = CTT L (Leu)
3 , // [32] = GAA E (Glu)
2 , // [33] = GAC D (Asp)
3 , // [34] = GAG E (Glu)
2 , // [35] = GAT D (Asp)
0 , // [36] = GCA A (Ala)
0 , // [37] = GCC A (Ala)
0 , // [38] = GCG A (Ala)
0 , // [39] = GCT A (Ala)
5 , // [40] = GGA G (Gly)
5 , // [41] = GGC G (Gly)
5 , // [42] = GGG G (Gly)
5 , // [43] = GGT G (Gly)
17, // [44] = GTA V (Val)
17, // [45] = GTC V (Val)
17, // [46] = GTG V (Val)
17, // [47] = GTT V (Val)
20, // [48] = TAA * (STP)
19, // [49] = TAC Y (Tyr)
20, // [50] = TAG * (STP)
19, // [51] = TAT Y (Tyr)
15, // [52] = TCA S (Ser)
15, // [53] = TCC S (Ser)
15, // [54] = TCG S (Ser)
15, // [55] = TCT S (Ser)
20, // [56] = TGA * (STP)
1 , // [57] = TGC C (Cys)
18, // [58] = TGG W (Trp)
1 , // [59] = TGT C (Cys)
9 , // [60] = TTA L (Leu)
4 , // [61] = TTC F (Phe)
9 , // [62] = TTG L (Leu)
4 , // [63] = TTT F (Phe)
};
byte g_CodonWordToAminoChar[4*4*4] =
{
'K', // [ 0] = AAA (Lys)
'N', // [ 1] = AAC (Asn)
'K', // [ 2] = AAG (Lys)
'N', // [ 3] = AAT (Asn)
'T', // [ 4] = ACA (Thr)
'T', // [ 5] = ACC (Thr)
'T', // [ 6] = ACG (Thr)
'T', // [ 7] = ACT (Thr)
'R', // [ 8] = AGA (Arg)
'S', // [ 9] = AGC (Ser)
'R', // [10] = AGG (Arg)
'S', // [11] = AGT (Ser)
'I', // [12] = ATA (Ile)
'I', // [13] = ATC (Ile)
'M', // [14] = ATG (Met)
'I', // [15] = ATT (Ile)
'Q', // [16] = CAA (Gln)
'H', // [17] = CAC (His)
'Q', // [18] = CAG (Gln)
'H', // [19] = CAT (His)
'P', // [20] = CCA (Pro)
'P', // [21] = CCC (Pro)
'P', // [22] = CCG (Pro)
'P', // [23] = CCT (Pro)
'R', // [24] = CGA (Arg)
'R', // [25] = CGC (Arg)
'R', // [26] = CGG (Arg)
'R', // [27] = CGT (Arg)
'L', // [28] = CTA (Leu)
'L', // [29] = CTC (Leu)
'L', // [30] = CTG (Leu)
'L', // [31] = CTT (Leu)
'E', // [32] = GAA (Glu)
'D', // [33] = GAC (Asp)
'E', // [34] = GAG (Glu)
'D', // [35] = GAT (Asp)
'A', // [36] = GCA (Ala)
'A', // [37] = GCC (Ala)
'A', // [38] = GCG (Ala)
'A', // [39] = GCT (Ala)
'G', // [40] = GGA (Gly)
'G', // [41] = GGC (Gly)
'G', // [42] = GGG (Gly)
'G', // [43] = GGT (Gly)
'V', // [44] = GTA (Val)
'V', // [45] = GTC (Val)
'V', // [46] = GTG (Val)
'V', // [47] = GTT (Val)
'*', // [48] = TAA (STP)
'Y', // [49] = TAC (Tyr)
'*', // [50] = TAG (STP)
'Y', // [51] = TAT (Tyr)
'S', // [52] = TCA (Ser)
'S', // [53] = TCC (Ser)
'S', // [54] = TCG (Ser)
'S', // [55] = TCT (Ser)
'*', // [56] = TGA (STP)
'C', // [57] = TGC (Cys)
'W', // [58] = TGG (Trp)
'C', // [59] = TGT (Cys)
'L', // [60] = TTA (Leu)
'F', // [61] = TTC (Phe)
'L', // [62] = TTG (Leu)
'F', // [63] = TTT (Phe)
};
byte g_CharToCompChar[256] =
{
'?', // [ 0] 0x00 invalid nt
'?', // [ 1] 0x01 invalid nt
'?', // [ 2] 0x02 invalid nt
'?', // [ 3] 0x03 invalid nt
'?', // [ 4] 0x04 invalid nt
'?', // [ 5] 0x05 invalid nt
'?', // [ 6] 0x06 invalid nt
'?', // [ 7] 0x07 invalid nt
'?', // [ 8] 0x08 invalid nt
'?', // [ 9] 0x09 invalid nt
'?', // [ 10] 0x0a invalid nt
'?', // [ 11] 0x0b invalid nt
'?', // [ 12] 0x0c invalid nt
'?', // [ 13] 0x0d invalid nt
'?', // [ 14] 0x0e invalid nt
'?', // [ 15] 0x0f invalid nt
'?', // [ 16] 0x10 invalid nt
'?', // [ 17] 0x11 invalid nt
'?', // [ 18] 0x12 invalid nt
'?', // [ 19] 0x13 invalid nt
'?', // [ 20] 0x14 invalid nt
'?', // [ 21] 0x15 invalid nt
'?', // [ 22] 0x16 invalid nt
'?', // [ 23] 0x17 invalid nt
'?', // [ 24] 0x18 invalid nt
'?', // [ 25] 0x19 invalid nt
'?', // [ 26] 0x1a invalid nt
'?', // [ 27] 0x1b invalid nt
'?', // [ 28] 0x1c invalid nt
'?', // [ 29] 0x1d invalid nt
'?', // [ 30] 0x1e invalid nt
'?', // [ 31] 0x1f invalid nt
'?', // [ 32] 0x20 invalid nt
'?', // [ 33] 0x21 invalid nt
'?', // [ 34] 0x22 invalid nt
'?', // [ 35] 0x23 invalid nt
'?', // [ 36] 0x24 invalid nt
'?', // [ 37] 0x25 invalid nt
'?', // [ 38] 0x26 invalid nt
'?', // [ 39] 0x27 invalid nt
'?', // [ 40] 0x28 invalid nt
'?', // [ 41] 0x29 invalid nt
'?', // [ 42] 0x2a invalid nt
'?', // [ 43] 0x2b invalid nt
'?', // [ 44] 0x2c invalid nt
'?', // [ 45] 0x2d invalid nt
'?', // [ 46] 0x2e invalid nt
'?', // [ 47] 0x2f invalid nt
'?', // [ 48] 0x30 invalid nt
'?', // [ 49] 0x31 invalid nt
'?', // [ 50] 0x32 invalid nt
'?', // [ 51] 0x33 invalid nt
'?', // [ 52] 0x34 invalid nt
'?', // [ 53] 0x35 invalid nt
'?', // [ 54] 0x36 invalid nt
'?', // [ 55] 0x37 invalid nt
'?', // [ 56] 0x38 invalid nt
'?', // [ 57] 0x39 invalid nt
'?', // [ 58] 0x3a invalid nt
'?', // [ 59] 0x3b invalid nt
'?', // [ 60] 0x3c invalid nt
'?', // [ 61] 0x3d invalid nt
'?', // [ 62] 0x3e invalid nt
'?', // [ 63] 0x3f invalid nt
'?', // [ 64] 0x40 invalid nt
'T', // [ 65] A -> T
'V', // [ 66] B -> V
'G', // [ 67] C -> G
'H', // [ 68] D -> H
'?', // [ 69] E invalid nt
'?', // [ 70] F invalid nt
'C', // [ 71] G -> C
'D', // [ 72] H -> D
'?', // [ 73] I invalid nt
'?', // [ 74] J invalid nt
'M', // [ 75] K -> M
'?', // [ 76] L invalid nt
'K', // [ 77] M -> K
'N', // [ 78] N -> N
'?', // [ 79] O invalid nt
'?', // [ 80] P invalid nt
'?', // [ 81] Q invalid nt
'Y', // [ 82] R -> Y
'S', // [ 83] S -> S
'A', // [ 84] T -> A
'A', // [ 85] U -> A
'B', // [ 86] V -> B
'W', // [ 87] W -> W
'X', // [ 88] X -> X
'R', // [ 89] Y -> R
'?', // [ 90] Z invalid nt
'?', // [ 91] 0x5b invalid nt
'?', // [ 92] 0x5c invalid nt
'?', // [ 93] 0x5d invalid nt
'?', // [ 94] 0x5e invalid nt
'?', // [ 95] 0x5f invalid nt
'?', // [ 96] 0x60 invalid nt
't', // [ 97] a -> t
'v', // [ 98] b -> v
'g', // [ 99] c -> g
'h', // [100] d -> h
'?', // [101] e invalid nt
'?', // [102] f invalid nt
'c', // [103] g -> c
'd', // [104] h -> d
'?', // [105] i invalid nt
'?', // [106] j invalid nt
'm', // [107] k -> m
'?', // [108] l invalid nt
'k', // [109] m -> k
'n', // [110] n -> n
'?', // [111] o invalid nt
'?', // [112] p invalid nt
'?', // [113] q invalid nt
'y', // [114] r -> y
's', // [115] s -> s
'a', // [116] t -> a
'?', // [117] u invalid nt
'b', // [118] v -> b
'w', // [119] w -> w
'x', // [120] x -> x
'r', // [121] y -> r
'?', // [122] z invalid nt
'?', // [123] 0x7b invalid nt
'?', // [124] 0x7c invalid nt
'?', // [125] 0x7d invalid nt
'?', // [126] 0x7e invalid nt
'?', // [127] 0x7f invalid nt
'?', // [128] 0x80 invalid nt
'?', // [129] 0x81 invalid nt
'?', // [130] 0x82 invalid nt
'?', // [131] 0x83 invalid nt
'?', // [132] 0x84 invalid nt
'?', // [133] 0x85 invalid nt
'?', // [134] 0x86 invalid nt
'?', // [135] 0x87 invalid nt
'?', // [136] 0x88 invalid nt
'?', // [137] 0x89 invalid nt
'?', // [138] 0x8a invalid nt
'?', // [139] 0x8b invalid nt
'?', // [140] 0x8c invalid nt
'?', // [141] 0x8d invalid nt
'?', // [142] 0x8e invalid nt
'?', // [143] 0x8f invalid nt
'?', // [144] 0x90 invalid nt
'?', // [145] 0x91 invalid nt
'?', // [146] 0x92 invalid nt
'?', // [147] 0x93 invalid nt
'?', // [148] 0x94 invalid nt
'?', // [149] 0x95 invalid nt
'?', // [150] 0x96 invalid nt
'?', // [151] 0x97 invalid nt
'?', // [152] 0x98 invalid nt
'?', // [153] 0x99 invalid nt
'?', // [154] 0x9a invalid nt
'?', // [155] 0x9b invalid nt
'?', // [156] 0x9c invalid nt
'?', // [157] 0x9d invalid nt
'?', // [158] 0x9e invalid nt
'?', // [159] 0x9f invalid nt
'?', // [160] 0xa0 invalid nt
'?', // [161] 0xa1 invalid nt
'?', // [162] 0xa2 invalid nt
'?', // [163] 0xa3 invalid nt
'?', // [164] 0xa4 invalid nt
'?', // [165] 0xa5 invalid nt
'?', // [166] 0xa6 invalid nt
'?', // [167] 0xa7 invalid nt
'?', // [168] 0xa8 invalid nt
'?', // [169] 0xa9 invalid nt
'?', // [170] 0xaa invalid nt
'?', // [171] 0xab invalid nt
'?', // [172] 0xac invalid nt
'?', // [173] 0xad invalid nt
'?', // [174] 0xae invalid nt
'?', // [175] 0xaf invalid nt
'?', // [176] 0xb0 invalid nt
'?', // [177] 0xb1 invalid nt
'?', // [178] 0xb2 invalid nt
'?', // [179] 0xb3 invalid nt
'?', // [180] 0xb4 invalid nt
'?', // [181] 0xb5 invalid nt
'?', // [182] 0xb6 invalid nt
'?', // [183] 0xb7 invalid nt
'?', // [184] 0xb8 invalid nt
'?', // [185] 0xb9 invalid nt
'?', // [186] 0xba invalid nt
'?', // [187] 0xbb invalid nt
'?', // [188] 0xbc invalid nt
'?', // [189] 0xbd invalid nt
'?', // [190] 0xbe invalid nt
'?', // [191] 0xbf invalid nt
'?', // [192] 0xc0 invalid nt
'?', // [193] 0xc1 invalid nt
'?', // [194] 0xc2 invalid nt
'?', // [195] 0xc3 invalid nt
'?', // [196] 0xc4 invalid nt
'?', // [197] 0xc5 invalid nt
'?', // [198] 0xc6 invalid nt
'?', // [199] 0xc7 invalid nt
'?', // [200] 0xc8 invalid nt
'?', // [201] 0xc9 invalid nt
'?', // [202] 0xca invalid nt
'?', // [203] 0xcb invalid nt
'?', // [204] 0xcc invalid nt
'?', // [205] 0xcd invalid nt
'?', // [206] 0xce invalid nt
'?', // [207] 0xcf invalid nt
'?', // [208] 0xd0 invalid nt
'?', // [209] 0xd1 invalid nt
'?', // [210] 0xd2 invalid nt
'?', // [211] 0xd3 invalid nt
'?', // [212] 0xd4 invalid nt
'?', // [213] 0xd5 invalid nt
'?', // [214] 0xd6 invalid nt
'?', // [215] 0xd7 invalid nt
'?', // [216] 0xd8 invalid nt
'?', // [217] 0xd9 invalid nt
'?', // [218] 0xda invalid nt
'?', // [219] 0xdb invalid nt
'?', // [220] 0xdc invalid nt
'?', // [221] 0xdd invalid nt
'?', // [222] 0xde invalid nt
'?', // [223] 0xdf invalid nt
'?', // [224] 0xe0 invalid nt
'?', // [225] 0xe1 invalid nt
'?', // [226] 0xe2 invalid nt
'?', // [227] 0xe3 invalid nt
'?', // [228] 0xe4 invalid nt
'?', // [229] 0xe5 invalid nt
'?', // [230] 0xe6 invalid nt
'?', // [231] 0xe7 invalid nt
'?', // [232] 0xe8 invalid nt
'?', // [233] 0xe9 invalid nt
'?', // [234] 0xea invalid nt
'?', // [235] 0xeb invalid nt
'?', // [236] 0xec invalid nt
'?', // [237] 0xed invalid nt
'?', // [238] 0xee invalid nt
'?', // [239] 0xef invalid nt
'?', // [240] 0xf0 invalid nt
'?', // [241] 0xf1 invalid nt
'?', // [242] 0xf2 invalid nt
'?', // [243] 0xf3 invalid nt
'?', // [244] 0xf4 invalid nt
'?', // [245] 0xf5 invalid nt
'?', // [246] 0xf6 invalid nt
'?', // [247] 0xf7 invalid nt
'?', // [248] 0xf8 invalid nt
'?', // [249] 0xf9 invalid nt
'?', // [250] 0xfa invalid nt
'?', // [251] 0xfb invalid nt
'?', // [252] 0xfc invalid nt
'?', // [253] 0xfd invalid nt
'?', // [254] 0xfe invalid nt
'?', // [255] 0xff invalid nt
};
byte g_LetterToCompLetter[256] =
{
3, // 0(A) -> 3(T)
2, // 1(C) -> 2(G)
1, // 2(G) -> 1(C)
0, // 3(T) -> 0(A)
INVALID_LETTER, // [ 4]
INVALID_LETTER, // [ 5]
INVALID_LETTER, // [ 6]
INVALID_LETTER, // [ 7]
INVALID_LETTER, // [ 8]
INVALID_LETTER, // [ 9]
INVALID_LETTER, // [ 10]
INVALID_LETTER, // [ 11]
INVALID_LETTER, // [ 12]
INVALID_LETTER, // [ 13]
INVALID_LETTER, // [ 14]
INVALID_LETTER, // [ 15]
INVALID_LETTER, // [ 16]
INVALID_LETTER, // [ 17]
INVALID_LETTER, // [ 18]
INVALID_LETTER, // [ 19]
INVALID_LETTER, // [ 20]
INVALID_LETTER, // [ 21]
INVALID_LETTER, // [ 22]
INVALID_LETTER, // [ 23]
INVALID_LETTER, // [ 24]
INVALID_LETTER, // [ 25]
INVALID_LETTER, // [ 26]
INVALID_LETTER, // [ 27]
INVALID_LETTER, // [ 28]
INVALID_LETTER, // [ 29]
INVALID_LETTER, // [ 30]
INVALID_LETTER, // [ 31]
INVALID_LETTER, // [ 32]
INVALID_LETTER, // [ 33]
INVALID_LETTER, // [ 34]
INVALID_LETTER, // [ 35]
INVALID_LETTER, // [ 36]
INVALID_LETTER, // [ 37]
INVALID_LETTER, // [ 38]
INVALID_LETTER, // [ 39]
INVALID_LETTER, // [ 40]
INVALID_LETTER, // [ 41]
INVALID_LETTER, // [ 42]
INVALID_LETTER, // [ 43]
INVALID_LETTER, // [ 44]
INVALID_LETTER, // [ 45]
INVALID_LETTER, // [ 46]
INVALID_LETTER, // [ 47]
INVALID_LETTER, // [ 48]
INVALID_LETTER, // [ 49]
INVALID_LETTER, // [ 50]
INVALID_LETTER, // [ 51]
INVALID_LETTER, // [ 52]
INVALID_LETTER, // [ 53]
INVALID_LETTER, // [ 54]
INVALID_LETTER, // [ 55]
INVALID_LETTER, // [ 56]
INVALID_LETTER, // [ 57]
INVALID_LETTER, // [ 58]
INVALID_LETTER, // [ 59]
INVALID_LETTER, // [ 60]
INVALID_LETTER, // [ 61]
INVALID_LETTER, // [ 62]
INVALID_LETTER, // [ 63]
INVALID_LETTER, // [ 64]
INVALID_LETTER, // [ 65]
INVALID_LETTER, // [ 66]
INVALID_LETTER, // [ 67]
INVALID_LETTER, // [ 68]
INVALID_LETTER, // [ 69]
INVALID_LETTER, // [ 70]
INVALID_LETTER, // [ 71]
INVALID_LETTER, // [ 72]
INVALID_LETTER, // [ 73]
INVALID_LETTER, // [ 74]
INVALID_LETTER, // [ 75]
INVALID_LETTER, // [ 76]
INVALID_LETTER, // [ 77]
INVALID_LETTER, // [ 78]
INVALID_LETTER, // [ 79]
INVALID_LETTER, // [ 80]
INVALID_LETTER, // [ 81]
INVALID_LETTER, // [ 82]
INVALID_LETTER, // [ 83]
INVALID_LETTER, // [ 84]
INVALID_LETTER, // [ 85]
INVALID_LETTER, // [ 86]
INVALID_LETTER, // [ 87]
INVALID_LETTER, // [ 88]
INVALID_LETTER, // [ 89]
INVALID_LETTER, // [ 90]
INVALID_LETTER, // [ 91]
INVALID_LETTER, // [ 92]
INVALID_LETTER, // [ 93]
INVALID_LETTER, // [ 94]
INVALID_LETTER, // [ 95]
INVALID_LETTER, // [ 96]
INVALID_LETTER, // [ 97]
INVALID_LETTER, // [ 98]
INVALID_LETTER, // [ 99]
INVALID_LETTER, // [100]
INVALID_LETTER, // [101]
INVALID_LETTER, // [102]
INVALID_LETTER, // [103]
INVALID_LETTER, // [104]
INVALID_LETTER, // [105]
INVALID_LETTER, // [106]
INVALID_LETTER, // [107]
INVALID_LETTER, // [108]
INVALID_LETTER, // [109]
INVALID_LETTER, // [110]
INVALID_LETTER, // [111]
INVALID_LETTER, // [112]
INVALID_LETTER, // [113]
INVALID_LETTER, // [114]
INVALID_LETTER, // [115]
INVALID_LETTER, // [116]
INVALID_LETTER, // [117]
INVALID_LETTER, // [118]
INVALID_LETTER, // [119]
INVALID_LETTER, // [120]
INVALID_LETTER, // [121]
INVALID_LETTER, // [122]
INVALID_LETTER, // [123]
INVALID_LETTER, // [124]
INVALID_LETTER, // [125]
INVALID_LETTER, // [126]
INVALID_LETTER, // [127]
INVALID_LETTER, // [128]
INVALID_LETTER, // [129]
INVALID_LETTER, // [130]
INVALID_LETTER, // [131]
INVALID_LETTER, // [132]
INVALID_LETTER, // [133]
INVALID_LETTER, // [134]
INVALID_LETTER, // [135]
INVALID_LETTER, // [136]
INVALID_LETTER, // [137]
INVALID_LETTER, // [138]
INVALID_LETTER, // [139]
INVALID_LETTER, // [140]
INVALID_LETTER, // [141]
INVALID_LETTER, // [142]
INVALID_LETTER, // [143]
INVALID_LETTER, // [144]
INVALID_LETTER, // [145]
INVALID_LETTER, // [146]
INVALID_LETTER, // [147]
INVALID_LETTER, // [148]
INVALID_LETTER, // [149]
INVALID_LETTER, // [150]
INVALID_LETTER, // [151]
INVALID_LETTER, // [152]
INVALID_LETTER, // [153]
INVALID_LETTER, // [154]
INVALID_LETTER, // [155]
INVALID_LETTER, // [156]
INVALID_LETTER, // [157]
INVALID_LETTER, // [158]
INVALID_LETTER, // [159]
INVALID_LETTER, // [160]
INVALID_LETTER, // [161]
INVALID_LETTER, // [162]
INVALID_LETTER, // [163]
INVALID_LETTER, // [164]
INVALID_LETTER, // [165]
INVALID_LETTER, // [166]
INVALID_LETTER, // [167]
INVALID_LETTER, // [168]
INVALID_LETTER, // [169]
INVALID_LETTER, // [170]
INVALID_LETTER, // [171]
INVALID_LETTER, // [172]
INVALID_LETTER, // [173]
INVALID_LETTER, // [174]
INVALID_LETTER, // [175]
INVALID_LETTER, // [176]
INVALID_LETTER, // [177]
INVALID_LETTER, // [178]
INVALID_LETTER, // [179]
INVALID_LETTER, // [180]
INVALID_LETTER, // [181]
INVALID_LETTER, // [182]
INVALID_LETTER, // [183]
INVALID_LETTER, // [184]
INVALID_LETTER, // [185]
INVALID_LETTER, // [186]
INVALID_LETTER, // [187]
INVALID_LETTER, // [188]
INVALID_LETTER, // [189]
INVALID_LETTER, // [190]
INVALID_LETTER, // [191]
INVALID_LETTER, // [192]
INVALID_LETTER, // [193]
INVALID_LETTER, // [194]
INVALID_LETTER, // [195]
INVALID_LETTER, // [196]
INVALID_LETTER, // [197]
INVALID_LETTER, // [198]
INVALID_LETTER, // [199]
INVALID_LETTER, // [200]
INVALID_LETTER, // [201]
INVALID_LETTER, // [202]
INVALID_LETTER, // [203]
INVALID_LETTER, // [204]
INVALID_LETTER, // [205]
INVALID_LETTER, // [206]
INVALID_LETTER, // [207]
INVALID_LETTER, // [208]
INVALID_LETTER, // [209]
INVALID_LETTER, // [210]
INVALID_LETTER, // [211]
INVALID_LETTER, // [212]
INVALID_LETTER, // [213]
INVALID_LETTER, // [214]
INVALID_LETTER, // [215]
INVALID_LETTER, // [216]
INVALID_LETTER, // [217]
INVALID_LETTER, // [218]
INVALID_LETTER, // [219]
INVALID_LETTER, // [220]
INVALID_LETTER, // [221]
INVALID_LETTER, // [222]
INVALID_LETTER, // [223]
INVALID_LETTER, // [224]
INVALID_LETTER, // [225]
INVALID_LETTER, // [226]
INVALID_LETTER, // [227]
INVALID_LETTER, // [228]
INVALID_LETTER, // [229]
INVALID_LETTER, // [230]
INVALID_LETTER, // [231]
INVALID_LETTER, // [232]
INVALID_LETTER, // [233]
INVALID_LETTER, // [234]
INVALID_LETTER, // [235]
INVALID_LETTER, // [236]
INVALID_LETTER, // [237]
INVALID_LETTER, // [238]
INVALID_LETTER, // [239]
INVALID_LETTER, // [240]
INVALID_LETTER, // [241]
INVALID_LETTER, // [242]
INVALID_LETTER, // [243]
INVALID_LETTER, // [244]
INVALID_LETTER, // [245]
INVALID_LETTER, // [246]
INVALID_LETTER, // [247]
INVALID_LETTER, // [248]
INVALID_LETTER, // [249]
INVALID_LETTER, // [250]
INVALID_LETTER, // [251]
INVALID_LETTER, // [252]
INVALID_LETTER, // [253]
INVALID_LETTER, // [254]
INVALID_LETTER, // [255]
};
byte g_CharToCompLetter[256] =
{
INVALID_LETTER, // [ 0]
INVALID_LETTER, // [ 1]
INVALID_LETTER, // [ 2]
INVALID_LETTER, // [ 3]
INVALID_LETTER, // [ 4]
INVALID_LETTER, // [ 5]
INVALID_LETTER, // [ 6]
INVALID_LETTER, // [ 7]
INVALID_LETTER, // [ 8]
INVALID_LETTER, // [ 9]
INVALID_LETTER, // [ 10]
INVALID_LETTER, // [ 11]
INVALID_LETTER, // [ 12]
INVALID_LETTER, // [ 13]
INVALID_LETTER, // [ 14]
INVALID_LETTER, // [ 15]
INVALID_LETTER, // [ 16]
INVALID_LETTER, // [ 17]
INVALID_LETTER, // [ 18]
INVALID_LETTER, // [ 19]
INVALID_LETTER, // [ 20]
INVALID_LETTER, // [ 21]
INVALID_LETTER, // [ 22]
INVALID_LETTER, // [ 23]
INVALID_LETTER, // [ 24]
INVALID_LETTER, // [ 25]
INVALID_LETTER, // [ 26]
INVALID_LETTER, // [ 27]
INVALID_LETTER, // [ 28]
INVALID_LETTER, // [ 29]
INVALID_LETTER, // [ 30]
INVALID_LETTER, // [ 31]
INVALID_LETTER, // [ 32]
INVALID_LETTER, // [ 33]
INVALID_LETTER, // [ 34]
INVALID_LETTER, // [ 35]
INVALID_LETTER, // [ 36]
INVALID_LETTER, // [ 37]
INVALID_LETTER, // [ 38]
INVALID_LETTER, // [ 39]
INVALID_LETTER, // [ 40]
INVALID_LETTER, // [ 41]
INVALID_LETTER, // [ 42]
INVALID_LETTER, // [ 43]
INVALID_LETTER, // [ 44]
INVALID_LETTER, // [ 45]
INVALID_LETTER, // [ 46]
INVALID_LETTER, // [ 47]
INVALID_LETTER, // [ 48]
INVALID_LETTER, // [ 49]
INVALID_LETTER, // [ 50]
INVALID_LETTER, // [ 51]
INVALID_LETTER, // [ 52]
INVALID_LETTER, // [ 53]
INVALID_LETTER, // [ 54]
INVALID_LETTER, // [ 55]
INVALID_LETTER, // [ 56]
INVALID_LETTER, // [ 57]
INVALID_LETTER, // [ 58]
INVALID_LETTER, // [ 59]
INVALID_LETTER, // [ 60]
INVALID_LETTER, // [ 61]
INVALID_LETTER, // [ 62]
INVALID_LETTER, // [ 63]
INVALID_LETTER, // [ 64]
3, // [ 65] A -> T
INVALID_LETTER, // [ 66] B -> V
2, // [ 67] C -> G
INVALID_LETTER, // [ 68] D -> H
INVALID_LETTER, // [ 69]
INVALID_LETTER, // [ 70]
1, // [ 71] G -> C
INVALID_LETTER, // [ 72] H -> D
INVALID_LETTER, // [ 73]
INVALID_LETTER, // [ 74]
INVALID_LETTER, // [ 75] K -> M
INVALID_LETTER, // [ 76]
INVALID_LETTER, // [ 77] M -> K
INVALID_LETTER, // [ 78] N -> N
INVALID_LETTER, // [ 79]
INVALID_LETTER, // [ 80]
INVALID_LETTER, // [ 81]
INVALID_LETTER, // [ 82] R -> Y
INVALID_LETTER, // [ 83] S -> S
0, // [ 84] T -> A
0, // [ 85] U -> A
INVALID_LETTER, // [ 86] V -> B
INVALID_LETTER, // [ 87] W -> W
INVALID_LETTER, // [ 88] X -> X
INVALID_LETTER, // [ 89] Y -> R
INVALID_LETTER, // [ 90]
INVALID_LETTER, // [ 91]
INVALID_LETTER, // [ 92]
INVALID_LETTER, // [ 93]
INVALID_LETTER, // [ 94]
INVALID_LETTER, // [ 95]
INVALID_LETTER, // [ 96]
3, // [ 97] a -> t
INVALID_LETTER, // [ 98] b -> v
2, // [ 99] c -> g
INVALID_LETTER, // [100] d -> h
INVALID_LETTER, // [101]
INVALID_LETTER, // [102]
1, // [103] g -> c
INVALID_LETTER, // [104] h -> d
INVALID_LETTER, // [105]
INVALID_LETTER, // [106]
INVALID_LETTER, // [107] k -> m
INVALID_LETTER, // [108]
INVALID_LETTER, // [109] m -> k
INVALID_LETTER, // [110] n -> n
INVALID_LETTER, // [111]
INVALID_LETTER, // [112]
INVALID_LETTER, // [113]
INVALID_LETTER, // [114] r -> y
INVALID_LETTER, // [115] s -> s
0, // [116] t -> a
INVALID_LETTER, // [117]
INVALID_LETTER, // [118] v -> b
INVALID_LETTER, // [119] w -> w
INVALID_LETTER, // [120] x -> x
INVALID_LETTER, // [121] y -> r
INVALID_LETTER, // [122]
INVALID_LETTER, // [123]
INVALID_LETTER, // [124]
INVALID_LETTER, // [125]
INVALID_LETTER, // [126]
INVALID_LETTER, // [127]
INVALID_LETTER, // [128]
INVALID_LETTER, // [129]
INVALID_LETTER, // [130]
INVALID_LETTER, // [131]
INVALID_LETTER, // [132]
INVALID_LETTER, // [133]
INVALID_LETTER, // [134]
INVALID_LETTER, // [135]
INVALID_LETTER, // [136]
INVALID_LETTER, // [137]
INVALID_LETTER, // [138]
INVALID_LETTER, // [139]
INVALID_LETTER, // [140]
INVALID_LETTER, // [141]
INVALID_LETTER, // [142]
INVALID_LETTER, // [143]
INVALID_LETTER, // [144]
INVALID_LETTER, // [145]
INVALID_LETTER, // [146]
INVALID_LETTER, // [147]
INVALID_LETTER, // [148]
INVALID_LETTER, // [149]
INVALID_LETTER, // [150]
INVALID_LETTER, // [151]
INVALID_LETTER, // [152]
INVALID_LETTER, // [153]
INVALID_LETTER, // [154]
INVALID_LETTER, // [155]
INVALID_LETTER, // [156]
INVALID_LETTER, // [157]
INVALID_LETTER, // [158]
INVALID_LETTER, // [159]
INVALID_LETTER, // [160]
INVALID_LETTER, // [161]
INVALID_LETTER, // [162]
INVALID_LETTER, // [163]
INVALID_LETTER, // [164]
INVALID_LETTER, // [165]
INVALID_LETTER, // [166]
INVALID_LETTER, // [167]
INVALID_LETTER, // [168]
INVALID_LETTER, // [169]
INVALID_LETTER, // [170]
INVALID_LETTER, // [171]
INVALID_LETTER, // [172]
INVALID_LETTER, // [173]
INVALID_LETTER, // [174]
INVALID_LETTER, // [175]
INVALID_LETTER, // [176]
INVALID_LETTER, // [177]
INVALID_LETTER, // [178]
INVALID_LETTER, // [179]
INVALID_LETTER, // [180]
INVALID_LETTER, // [181]
INVALID_LETTER, // [182]
INVALID_LETTER, // [183]
INVALID_LETTER, // [184]
INVALID_LETTER, // [185]
INVALID_LETTER, // [186]
INVALID_LETTER, // [187]
INVALID_LETTER, // [188]
INVALID_LETTER, // [189]
INVALID_LETTER, // [190]
INVALID_LETTER, // [191]
INVALID_LETTER, // [192]
INVALID_LETTER, // [193]
INVALID_LETTER, // [194]
INVALID_LETTER, // [195]
INVALID_LETTER, // [196]
INVALID_LETTER, // [197]
INVALID_LETTER, // [198]
INVALID_LETTER, // [199]
INVALID_LETTER, // [200]
INVALID_LETTER, // [201]
INVALID_LETTER, // [202]
INVALID_LETTER, // [203]
INVALID_LETTER, // [204]
INVALID_LETTER, // [205]
INVALID_LETTER, // [206]
INVALID_LETTER, // [207]
INVALID_LETTER, // [208]
INVALID_LETTER, // [209]
INVALID_LETTER, // [210]
INVALID_LETTER, // [211]
INVALID_LETTER, // [212]
INVALID_LETTER, // [213]
INVALID_LETTER, // [214]
INVALID_LETTER, // [215]
INVALID_LETTER, // [216]
INVALID_LETTER, // [217]
INVALID_LETTER, // [218]
INVALID_LETTER, // [219]
INVALID_LETTER, // [220]
INVALID_LETTER, // [221]
INVALID_LETTER, // [222]
INVALID_LETTER, // [223]
INVALID_LETTER, // [224]
INVALID_LETTER, // [225]
INVALID_LETTER, // [226]
INVALID_LETTER, // [227]
INVALID_LETTER, // [228]
INVALID_LETTER, // [229]
INVALID_LETTER, // [230]
INVALID_LETTER, // [231]
INVALID_LETTER, // [232]
INVALID_LETTER, // [233]
INVALID_LETTER, // [234]
INVALID_LETTER, // [235]
INVALID_LETTER, // [236]
INVALID_LETTER, // [237]
INVALID_LETTER, // [238]
INVALID_LETTER, // [239]
INVALID_LETTER, // [240]
INVALID_LETTER, // [241]
INVALID_LETTER, // [242]
INVALID_LETTER, // [243]
INVALID_LETTER, // [244]
INVALID_LETTER, // [245]
INVALID_LETTER, // [246]
INVALID_LETTER, // [247]
INVALID_LETTER, // [248]
INVALID_LETTER, // [249]
INVALID_LETTER, // [250]
INVALID_LETTER, // [251]
INVALID_LETTER, // [252]
INVALID_LETTER, // [253]
INVALID_LETTER, // [254]
INVALID_LETTER, // [255]
};
bool g_IsSeqChar[256] =
{
false, // [ 0] 0x00
false, // [ 1] 0x01
false, // [ 2] 0x02
false, // [ 3] 0x03
false, // [ 4] 0x04
false, // [ 5] 0x05
false, // [ 6] 0x06
false, // [ 7] 0x07
false, // [ 8] 0x08
false, // [ 9] 0x09
false, // [ 10] 0x0a
false, // [ 11] 0x0b
false, // [ 12] 0x0c
false, // [ 13] 0x0d
false, // [ 14] 0x0e
false, // [ 15] 0x0f
false, // [ 16] 0x10
false, // [ 17] 0x11
false, // [ 18] 0x12
false, // [ 19] 0x13
false, // [ 20] 0x14
false, // [ 21] 0x15
false, // [ 22] 0x16
false, // [ 23] 0x17
false, // [ 24] 0x18
false, // [ 25] 0x19
false, // [ 26] 0x1a
false, // [ 27] 0x1b
false, // [ 28] 0x1c
false, // [ 29] 0x1d
false, // [ 30] 0x1e
false, // [ 31] 0x1f
false, // [ 32] ' '
false, // [ 33] '!'
false, // [ 34] '"'
false, // [ 35] '#'
false, // [ 36] '$'
false, // [ 37] '%'
false, // [ 38] '&'
false, // [ 39] '''
false, // [ 40] '('
false, // [ 41] ')'
true, // [ 42] '*'
false, // [ 43] '+'
false, // [ 44] ','
false, // [ 45] '-'
false, // [ 46] '.'
false, // [ 47] '/'
false, // [ 48] '0'
false, // [ 49] '1'
false, // [ 50] '2'
false, // [ 51] '3'
false, // [ 52] '4'
false, // [ 53] '5'
false, // [ 54] '6'
false, // [ 55] '7'
false, // [ 56] '8'
false, // [ 57] '9'
false, // [ 58] ':'
false, // [ 59] ';'
false, // [ 60] '<'
false, // [ 61] '='
false, // [ 62] '>'
false, // [ 63] '?'
false, // [ 64] '@'
true, // [ 65] 'A'
true, // [ 66] 'B'
true, // [ 67] 'C'
true, // [ 68] 'D'
true, // [ 69] 'E'
true, // [ 70] 'F'
true, // [ 71] 'G'
true, // [ 72] 'H'
true, // [ 73] 'I'
false, // [ 74] 'J'
true, // [ 75] 'K'
true, // [ 76] 'L'
true, // [ 77] 'M'
true, // [ 78] 'N'
false, // [ 79] 'O'
true, // [ 80] 'P'
true, // [ 81] 'Q'
true, // [ 82] 'R'
true, // [ 83] 'S'
true, // [ 84] 'T'
true, // [ 85] 'U'
true, // [ 86] 'V'
true, // [ 87] 'W'
true, // [ 88] 'X'
true, // [ 89] 'Y'
true, // [ 90] 'Z'
false, // [ 91] '['
false, // [ 92] '\'
false, // [ 93] ']'
false, // [ 94] '^'
false, // [ 95] '_'
false, // [ 96] '`'
true, // [ 97] 'a'
true, // [ 98] 'b'
true, // [ 99] 'c'
true, // [100] 'd'
true, // [101] 'e'
true, // [102] 'f'
true, // [103] 'g'
true, // [104] 'h'
true, // [105] 'i'
false, // [106] 'j'
true, // [107] 'k'
true, // [108] 'l'
true, // [109] 'm'
true, // [110] 'n'
false, // [111] 'o'
true, // [112] 'p'
true, // [113] 'q'
true, // [114] 'r'
true, // [115] 's'
true, // [116] 't'
true, // [117] 'u'
true, // [118] 'v'
true, // [119] 'w'
true, // [120] 'x'
true, // [121] 'y'
true, // [122] 'z'
false, // [123] '{'
false, // [124] '|'
false, // [125] '}'
false, // [126] '~'
false, // [127] 0x7f
false, // [128] 0x80
false, // [129] 0x81
false, // [130] 0x82
false, // [131] 0x83
false, // [132] 0x84
false, // [133] 0x85
false, // [134] 0x86
false, // [135] 0x87
false, // [136] 0x88
false, // [137] 0x89
false, // [138] 0x8a
false, // [139] 0x8b
false, // [140] 0x8c
false, // [141] 0x8d
false, // [142] 0x8e
false, // [143] 0x8f
false, // [144] 0x90
false, // [145] 0x91
false, // [146] 0x92
false, // [147] 0x93
false, // [148] 0x94
false, // [149] 0x95
false, // [150] 0x96
false, // [151] 0x97
false, // [152] 0x98
false, // [153] 0x99
false, // [154] 0x9a
false, // [155] 0x9b
false, // [156] 0x9c
false, // [157] 0x9d
false, // [158] 0x9e
false, // [159] 0x9f
false, // [160] 0xa0
false, // [161] 0xa1
false, // [162] 0xa2
false, // [163] 0xa3
false, // [164] 0xa4
false, // [165] 0xa5
false, // [166] 0xa6
false, // [167] 0xa7
false, // [168] 0xa8
false, // [169] 0xa9
false, // [170] 0xaa
false, // [171] 0xab
false, // [172] 0xac
false, // [173] 0xad
false, // [174] 0xae
false, // [175] 0xaf
false, // [176] 0xb0
false, // [177] 0xb1
false, // [178] 0xb2
false, // [179] 0xb3
false, // [180] 0xb4
false, // [181] 0xb5
false, // [182] 0xb6
false, // [183] 0xb7
false, // [184] 0xb8
false, // [185] 0xb9
false, // [186] 0xba
false, // [187] 0xbb
false, // [188] 0xbc
false, // [189] 0xbd
false, // [190] 0xbe
false, // [191] 0xbf
false, // [192] 0xc0
false, // [193] 0xc1
false, // [194] 0xc2
false, // [195] 0xc3
false, // [196] 0xc4
false, // [197] 0xc5
false, // [198] 0xc6
false, // [199] 0xc7
false, // [200] 0xc8
false, // [201] 0xc9
false, // [202] 0xca
false, // [203] 0xcb
false, // [204] 0xcc
false, // [205] 0xcd
false, // [206] 0xce
false, // [207] 0xcf
false, // [208] 0xd0
false, // [209] 0xd1
false, // [210] 0xd2
false, // [211] 0xd3
false, // [212] 0xd4
false, // [213] 0xd5
false, // [214] 0xd6
false, // [215] 0xd7
false, // [216] 0xd8
false, // [217] 0xd9
false, // [218] 0xda
false, // [219] 0xdb
false, // [220] 0xdc
false, // [221] 0xdd
false, // [222] 0xde
false, // [223] 0xdf
false, // [224] 0xe0
false, // [225] 0xe1
false, // [226] 0xe2
false, // [227] 0xe3
false, // [228] 0xe4
false, // [229] 0xe5
false, // [230] 0xe6
false, // [231] 0xe7
false, // [232] 0xe8
false, // [233] 0xe9
false, // [234] 0xea
false, // [235] 0xeb
false, // [236] 0xec
false, // [237] 0xed
false, // [238] 0xee
false, // [239] 0xef
false, // [240] 0xf0
false, // [241] 0xf1
false, // [242] 0xf2
false, // [243] 0xf3
false, // [244] 0xf4
false, // [245] 0xf5
false, // [246] 0xf6
false, // [247] 0xf7
false, // [248] 0xf8
false, // [249] 0xf9
false, // [250] 0xfa
false, // [251] 0xfb
false, // [252] 0xfc
false, // [253] 0xfd
false, // [254] 0xfe
false, // [255] 0xff
};
bool g_IsAminoChar[256] =
{
false, // [ 0] 0x00
false, // [ 1] 0x01
false, // [ 2] 0x02
false, // [ 3] 0x03
false, // [ 4] 0x04
false, // [ 5] 0x05
false, // [ 6] 0x06
false, // [ 7] 0x07
false, // [ 8] 0x08
false, // [ 9] 0x09
false, // [ 10] 0x0a
false, // [ 11] 0x0b
false, // [ 12] 0x0c
false, // [ 13] 0x0d
false, // [ 14] 0x0e
false, // [ 15] 0x0f
false, // [ 16] 0x10
false, // [ 17] 0x11
false, // [ 18] 0x12
false, // [ 19] 0x13
false, // [ 20] 0x14
false, // [ 21] 0x15
false, // [ 22] 0x16
false, // [ 23] 0x17
false, // [ 24] 0x18
false, // [ 25] 0x19
false, // [ 26] 0x1a
false, // [ 27] 0x1b
false, // [ 28] 0x1c
false, // [ 29] 0x1d
false, // [ 30] 0x1e
false, // [ 31] 0x1f
false, // [ 32] ' '
false, // [ 33] '!'
false, // [ 34] '"'
false, // [ 35] '#'
false, // [ 36] '$'
false, // [ 37] '%'
false, // [ 38] '&'
false, // [ 39] '''
false, // [ 40] '('
false, // [ 41] ')'
true, // [ 42] '*' = STP
false, // [ 43] '+'
false, // [ 44] ','
false, // [ 45] '-'
false, // [ 46] '.'
false, // [ 47] '/'
false, // [ 48] '0'
false, // [ 49] '1'
false, // [ 50] '2'
false, // [ 51] '3'
false, // [ 52] '4'
false, // [ 53] '5'
false, // [ 54] '6'
false, // [ 55] '7'
false, // [ 56] '8'
false, // [ 57] '9'
false, // [ 58] ':'
false, // [ 59] ';'
false, // [ 60] '<'
false, // [ 61] '='
false, // [ 62] '>'
false, // [ 63] '?'
false, // [ 64] '@'
true, // [ 65] 'A' = Ala
false, // [ 66] 'B'
true, // [ 67] 'C' = Cys
true, // [ 68] 'D' = Asp
true, // [ 69] 'E' = Glu
true, // [ 70] 'F' = Phe
true, // [ 71] 'G' = Gly
true, // [ 72] 'H' = His
true, // [ 73] 'I' = Ile
false, // [ 74] 'J'
true, // [ 75] 'K' = Lys
true, // [ 76] 'L' = Leu
true, // [ 77] 'M' = Met
true, // [ 78] 'N' = Asn
false, // [ 79] 'O'
true, // [ 80] 'P' = Pro
true, // [ 81] 'Q' = Gln
true, // [ 82] 'R' = Arg
true, // [ 83] 'S' = Ser
true, // [ 84] 'T' = Thr
false, // [ 85] 'U'
true, // [ 86] 'V' = Val
true, // [ 87] 'W' = Trp
false, // [ 88] 'X'
true, // [ 89] 'Y' = Tyr
false, // [ 90] 'Z'
false, // [ 91] '['
false, // [ 92] '\'
false, // [ 93] ']'
false, // [ 94] '^'
false, // [ 95] '_'
false, // [ 96] '`'
true, // [ 97] 'A' = Ala
false, // [ 98] 'B'
true, // [ 99] 'C' = Cys
true, // [100] 'D' = Asp
true, // [101] 'E' = Glu
true, // [102] 'F' = Phe
true, // [103] 'G' = Gly
true, // [104] 'H' = His
true, // [105] 'I' = Ile
false, // [106] 'J'
true, // [107] 'K' = Lys
true, // [108] 'L' = Leu
true, // [109] 'M' = Met
true, // [110] 'N' = Asn
false, // [111] 'O'
true, // [112] 'P' = Pro
true, // [113] 'Q' = Gln
true, // [114] 'R' = Arg
true, // [115] 'S' = Ser
true, // [116] 'T' = Thr
false, // [117] 'U'
true, // [118] 'V' = Val
true, // [119] 'W' = Trp
false, // [120] 'X'
true, // [121] 'Y' = Tyr
false, // [122] 'Z'
false, // [123] '{'
false, // [124] '|'
false, // [125] '}'
false, // [126] '~'
false, // [127] 0x7f
false, // [128] 0x80
false, // [129] 0x81
false, // [130] 0x82
false, // [131] 0x83
false, // [132] 0x84
false, // [133] 0x85
false, // [134] 0x86
false, // [135] 0x87
false, // [136] 0x88
false, // [137] 0x89
false, // [138] 0x8a
false, // [139] 0x8b
false, // [140] 0x8c
false, // [141] 0x8d
false, // [142] 0x8e
false, // [143] 0x8f
false, // [144] 0x90
false, // [145] 0x91
false, // [146] 0x92
false, // [147] 0x93
false, // [148] 0x94
false, // [149] 0x95
false, // [150] 0x96
false, // [151] 0x97
false, // [152] 0x98
false, // [153] 0x99
false, // [154] 0x9a
false, // [155] 0x9b
false, // [156] 0x9c
false, // [157] 0x9d
false, // [158] 0x9e
false, // [159] 0x9f
false, // [160] 0xa0
false, // [161] 0xa1
false, // [162] 0xa2
false, // [163] 0xa3
false, // [164] 0xa4
false, // [165] 0xa5
false, // [166] 0xa6
false, // [167] 0xa7
false, // [168] 0xa8
false, // [169] 0xa9
false, // [170] 0xaa
false, // [171] 0xab
false, // [172] 0xac
false, // [173] 0xad
false, // [174] 0xae
false, // [175] 0xaf
false, // [176] 0xb0
false, // [177] 0xb1
false, // [178] 0xb2
false, // [179] 0xb3
false, // [180] 0xb4
false, // [181] 0xb5
false, // [182] 0xb6
false, // [183] 0xb7
false, // [184] 0xb8
false, // [185] 0xb9
false, // [186] 0xba
false, // [187] 0xbb
false, // [188] 0xbc
false, // [189] 0xbd
false, // [190] 0xbe
false, // [191] 0xbf
false, // [192] 0xc0
false, // [193] 0xc1
false, // [194] 0xc2
false, // [195] 0xc3
false, // [196] 0xc4
false, // [197] 0xc5
false, // [198] 0xc6
false, // [199] 0xc7
false, // [200] 0xc8
false, // [201] 0xc9
false, // [202] 0xca
false, // [203] 0xcb
false, // [204] 0xcc
false, // [205] 0xcd
false, // [206] 0xce
false, // [207] 0xcf
false, // [208] 0xd0
false, // [209] 0xd1
false, // [210] 0xd2
false, // [211] 0xd3
false, // [212] 0xd4
false, // [213] 0xd5
false, // [214] 0xd6
false, // [215] 0xd7
false, // [216] 0xd8
false, // [217] 0xd9
false, // [218] 0xda
false, // [219] 0xdb
false, // [220] 0xdc
false, // [221] 0xdd
false, // [222] 0xde
false, // [223] 0xdf
false, // [224] 0xe0
false, // [225] 0xe1
false, // [226] 0xe2
false, // [227] 0xe3
false, // [228] 0xe4
false, // [229] 0xe5
false, // [230] 0xe6
false, // [231] 0xe7
false, // [232] 0xe8
false, // [233] 0xe9
false, // [234] 0xea
false, // [235] 0xeb
false, // [236] 0xec
false, // [237] 0xed
false, // [238] 0xee
false, // [239] 0xef
false, // [240] 0xf0
false, // [241] 0xf1
false, // [242] 0xf2
false, // [243] 0xf3
false, // [244] 0xf4
false, // [245] 0xf5
false, // [246] 0xf6
false, // [247] 0xf7
false, // [248] 0xf8
false, // [249] 0xf9
false, // [250] 0xfa
false, // [251] 0xfb
false, // [252] 0xfc
false, // [253] 0xfd
false, // [254] 0xfe
false, // [255] 0xff
};
bool g_IsNucleoChar[256] =
{
false, // [ 0] 0x00
false, // [ 1] 0x01
false, // [ 2] 0x02
false, // [ 3] 0x03
false, // [ 4] 0x04
false, // [ 5] 0x05
false, // [ 6] 0x06
false, // [ 7] 0x07
false, // [ 8] 0x08
false, // [ 9] 0x09
false, // [ 10] 0x0a
false, // [ 11] 0x0b
false, // [ 12] 0x0c
false, // [ 13] 0x0d
false, // [ 14] 0x0e
false, // [ 15] 0x0f
false, // [ 16] 0x10
false, // [ 17] 0x11
false, // [ 18] 0x12
false, // [ 19] 0x13
false, // [ 20] 0x14
false, // [ 21] 0x15
false, // [ 22] 0x16
false, // [ 23] 0x17
false, // [ 24] 0x18
false, // [ 25] 0x19
false, // [ 26] 0x1a
false, // [ 27] 0x1b
false, // [ 28] 0x1c
false, // [ 29] 0x1d
false, // [ 30] 0x1e
false, // [ 31] 0x1f
false, // [ 32] ' '
false, // [ 33] '!'
false, // [ 34] '"'
false, // [ 35] '#'
false, // [ 36] '$'
false, // [ 37] '%'
false, // [ 38] '&'
false, // [ 39] '''
false, // [ 40] '('
false, // [ 41] ')'
false, // [ 42] '*'
false, // [ 43] '+'
false, // [ 44] ','
false, // [ 45] '-'
false, // [ 46] '.'
false, // [ 47] '/'
false, // [ 48] '0'
false, // [ 49] '1'
false, // [ 50] '2'
false, // [ 51] '3'
false, // [ 52] '4'
false, // [ 53] '5'
false, // [ 54] '6'
false, // [ 55] '7'
false, // [ 56] '8'
false, // [ 57] '9'
false, // [ 58] ':'
false, // [ 59] ';'
false, // [ 60] '<'
false, // [ 61] '='
false, // [ 62] '>'
false, // [ 63] '?'
false, // [ 64] '@'
true, // [ 65] 'A' (Nucleotide)
false, // [ 66] 'B'
true, // [ 67] 'C' (Nucleotide)
false, // [ 68] 'D'
false, // [ 69] 'E'
false, // [ 70] 'F'
true, // [ 71] 'G' (Nucleotide)
false, // [ 72] 'H'
false, // [ 73] 'I'
false, // [ 74] 'J'
false, // [ 75] 'K'
false, // [ 76] 'L'
false, // [ 77] 'M'
true, // [ 78] 'N' (Nucleotide)
false, // [ 79] 'O'
false, // [ 80] 'P'
false, // [ 81] 'Q'
false, // [ 82] 'R'
false, // [ 83] 'S'
true, // [ 84] 'T' (Nucleotide)
true, // [ 85] 'U' (Nucleotide)
false, // [ 86] 'V'
false, // [ 87] 'W'
false, // [ 88] 'X'
false, // [ 89] 'Y'
false, // [ 90] 'Z'
false, // [ 91] '['
false, // [ 92] '\'
false, // [ 93] ']'
false, // [ 94] '^'
false, // [ 95] '_'
false, // [ 96] '`'
true, // [ 97] 'A' (Nucleotide)
false, // [ 98] 'B'
true, // [ 99] 'C' (Nucleotide)
false, // [100] 'D'
false, // [101] 'E'
false, // [102] 'F'
true, // [103] 'G' (Nucleotide)
false, // [104] 'H'
false, // [105] 'I'
false, // [106] 'J'
false, // [107] 'K'
false, // [108] 'L'
false, // [109] 'M'
true, // [110] 'N' (Nucleotide)
false, // [111] 'O'
false, // [112] 'P'
false, // [113] 'Q'
false, // [114] 'R'
false, // [115] 'S'
true, // [116] 'T' (Nucleotide)
true, // [117] 'U' (Nucleotide)
false, // [118] 'V'
false, // [119] 'W'
false, // [120] 'X'
false, // [121] 'Y'
false, // [122] 'Z'
false, // [123] '{'
false, // [124] '|'
false, // [125] '}'
false, // [126] '~'
false, // [127] 0x7f
false, // [128] 0x80
false, // [129] 0x81
false, // [130] 0x82
false, // [131] 0x83
false, // [132] 0x84
false, // [133] 0x85
false, // [134] 0x86
false, // [135] 0x87
false, // [136] 0x88
false, // [137] 0x89
false, // [138] 0x8a
false, // [139] 0x8b
false, // [140] 0x8c
false, // [141] 0x8d
false, // [142] 0x8e
false, // [143] 0x8f
false, // [144] 0x90
false, // [145] 0x91
false, // [146] 0x92
false, // [147] 0x93
false, // [148] 0x94
false, // [149] 0x95
false, // [150] 0x96
false, // [151] 0x97
false, // [152] 0x98
false, // [153] 0x99
false, // [154] 0x9a
false, // [155] 0x9b
false, // [156] 0x9c
false, // [157] 0x9d
false, // [158] 0x9e
false, // [159] 0x9f
false, // [160] 0xa0
false, // [161] 0xa1
false, // [162] 0xa2
false, // [163] 0xa3
false, // [164] 0xa4
false, // [165] 0xa5
false, // [166] 0xa6
false, // [167] 0xa7
false, // [168] 0xa8
false, // [169] 0xa9
false, // [170] 0xaa
false, // [171] 0xab
false, // [172] 0xac
false, // [173] 0xad
false, // [174] 0xae
false, // [175] 0xaf
false, // [176] 0xb0
false, // [177] 0xb1
false, // [178] 0xb2
false, // [179] 0xb3
false, // [180] 0xb4
false, // [181] 0xb5
false, // [182] 0xb6
false, // [183] 0xb7
false, // [184] 0xb8
false, // [185] 0xb9
false, // [186] 0xba
false, // [187] 0xbb
false, // [188] 0xbc
false, // [189] 0xbd
false, // [190] 0xbe
false, // [191] 0xbf
false, // [192] 0xc0
false, // [193] 0xc1
false, // [194] 0xc2
false, // [195] 0xc3
false, // [196] 0xc4
false, // [197] 0xc5
false, // [198] 0xc6
false, // [199] 0xc7
false, // [200] 0xc8
false, // [201] 0xc9
false, // [202] 0xca
false, // [203] 0xcb
false, // [204] 0xcc
false, // [205] 0xcd
false, // [206] 0xce
false, // [207] 0xcf
false, // [208] 0xd0
false, // [209] 0xd1
false, // [210] 0xd2
false, // [211] 0xd3
false, // [212] 0xd4
false, // [213] 0xd5
false, // [214] 0xd6
false, // [215] 0xd7
false, // [216] 0xd8
false, // [217] 0xd9
false, // [218] 0xda
false, // [219] 0xdb
false, // [220] 0xdc
false, // [221] 0xdd
false, // [222] 0xde
false, // [223] 0xdf
false, // [224] 0xe0
false, // [225] 0xe1
false, // [226] 0xe2
false, // [227] 0xe3
false, // [228] 0xe4
false, // [229] 0xe5
false, // [230] 0xe6
false, // [231] 0xe7
false, // [232] 0xe8
false, // [233] 0xe9
false, // [234] 0xea
false, // [235] 0xeb
false, // [236] 0xec
false, // [237] 0xed
false, // [238] 0xee
false, // [239] 0xef
false, // [240] 0xf0
false, // [241] 0xf1
false, // [242] 0xf2
false, // [243] 0xf3
false, // [244] 0xf4
false, // [245] 0xf5
false, // [246] 0xf6
false, // [247] 0xf7
false, // [248] 0xf8
false, // [249] 0xf9
false, // [250] 0xfa
false, // [251] 0xfb
false, // [252] 0xfc
false, // [253] 0xfd
false, // [254] 0xfe
false, // [255] 0xff
};
bool g_IsACGTU[256] =
{
false, // [ 0] 0x00
false, // [ 1] 0x01
false, // [ 2] 0x02
false, // [ 3] 0x03
false, // [ 4] 0x04
false, // [ 5] 0x05
false, // [ 6] 0x06
false, // [ 7] 0x07
false, // [ 8] 0x08
false, // [ 9] 0x09
false, // [ 10] 0x0a
false, // [ 11] 0x0b
false, // [ 12] 0x0c
false, // [ 13] 0x0d
false, // [ 14] 0x0e
false, // [ 15] 0x0f
false, // [ 16] 0x10
false, // [ 17] 0x11
false, // [ 18] 0x12
false, // [ 19] 0x13
false, // [ 20] 0x14
false, // [ 21] 0x15
false, // [ 22] 0x16
false, // [ 23] 0x17
false, // [ 24] 0x18
false, // [ 25] 0x19
false, // [ 26] 0x1a
false, // [ 27] 0x1b
false, // [ 28] 0x1c
false, // [ 29] 0x1d
false, // [ 30] 0x1e
false, // [ 31] 0x1f
false, // [ 32] ' '
false, // [ 33] '!'
false, // [ 34] '"'
false, // [ 35] '#'
false, // [ 36] '$'
false, // [ 37] '%'
false, // [ 38] '&'
false, // [ 39] '''
false, // [ 40] '('
false, // [ 41] ')'
false, // [ 42] '*'
false, // [ 43] '+'
false, // [ 44] ','
false, // [ 45] '-'
false, // [ 46] '.'
false, // [ 47] '/'
false, // [ 48] '0'
false, // [ 49] '1'
false, // [ 50] '2'
false, // [ 51] '3'
false, // [ 52] '4'
false, // [ 53] '5'
false, // [ 54] '6'
false, // [ 55] '7'
false, // [ 56] '8'
false, // [ 57] '9'
false, // [ 58] ':'
false, // [ 59] ';'
false, // [ 60] '<'
false, // [ 61] '='
false, // [ 62] '>'
false, // [ 63] '?'
false, // [ 64] '@'
true, // [ 65] 'A' (ACGT)
false, // [ 66] 'B'
true, // [ 67] 'C' (ACGT)
false, // [ 68] 'D'
false, // [ 69] 'E'
false, // [ 70] 'F'
true, // [ 71] 'G' (ACGT)
false, // [ 72] 'H'
false, // [ 73] 'I'
false, // [ 74] 'J'
false, // [ 75] 'K'
false, // [ 76] 'L'
false, // [ 77] 'M'
false, // [ 78] 'N'
false, // [ 79] 'O'
false, // [ 80] 'P'
false, // [ 81] 'Q'
false, // [ 82] 'R'
false, // [ 83] 'S'
true, // [ 84] 'T' (ACGT)
true, // [ 85] 'U' (ACGT)
false, // [ 86] 'V'
false, // [ 87] 'W'
false, // [ 88] 'X'
false, // [ 89] 'Y'
false, // [ 90] 'Z'
false, // [ 91] '['
false, // [ 92] '\'
false, // [ 93] ']'
false, // [ 94] '^'
false, // [ 95] '_'
false, // [ 96] '`'
true, // [ 97] 'A' (ACGT)
false, // [ 98] 'B'
true, // [ 99] 'C' (ACGT)
false, // [100] 'D'
false, // [101] 'E'
false, // [102] 'F'
true, // [103] 'G' (ACGT)
false, // [104] 'H'
false, // [105] 'I'
false, // [106] 'J'
false, // [107] 'K'
false, // [108] 'L'
false, // [109] 'M'
false, // [110] 'N'
false, // [111] 'O'
false, // [112] 'P'
false, // [113] 'Q'
false, // [114] 'R'
false, // [115] 'S'
true, // [116] 'T' (ACGT)
true, // [117] 'U' (ACGT)
false, // [118] 'V'
false, // [119] 'W'
false, // [120] 'X'
false, // [121] 'Y'
false, // [122] 'Z'
false, // [123] '{'
false, // [124] '|'
false, // [125] '}'
false, // [126] '~'
false, // [127] 0x7f
false, // [128] 0x80
false, // [129] 0x81
false, // [130] 0x82
false, // [131] 0x83
false, // [132] 0x84
false, // [133] 0x85
false, // [134] 0x86
false, // [135] 0x87
false, // [136] 0x88
false, // [137] 0x89
false, // [138] 0x8a
false, // [139] 0x8b
false, // [140] 0x8c
false, // [141] 0x8d
false, // [142] 0x8e
false, // [143] 0x8f
false, // [144] 0x90
false, // [145] 0x91
false, // [146] 0x92
false, // [147] 0x93
false, // [148] 0x94
false, // [149] 0x95
false, // [150] 0x96
false, // [151] 0x97
false, // [152] 0x98
false, // [153] 0x99
false, // [154] 0x9a
false, // [155] 0x9b
false, // [156] 0x9c
false, // [157] 0x9d
false, // [158] 0x9e
false, // [159] 0x9f
false, // [160] 0xa0
false, // [161] 0xa1
false, // [162] 0xa2
false, // [163] 0xa3
false, // [164] 0xa4
false, // [165] 0xa5
false, // [166] 0xa6
false, // [167] 0xa7
false, // [168] 0xa8
false, // [169] 0xa9
false, // [170] 0xaa
false, // [171] 0xab
false, // [172] 0xac
false, // [173] 0xad
false, // [174] 0xae
false, // [175] 0xaf
false, // [176] 0xb0
false, // [177] 0xb1
false, // [178] 0xb2
false, // [179] 0xb3
false, // [180] 0xb4
false, // [181] 0xb5
false, // [182] 0xb6
false, // [183] 0xb7
false, // [184] 0xb8
false, // [185] 0xb9
false, // [186] 0xba
false, // [187] 0xbb
false, // [188] 0xbc
false, // [189] 0xbd
false, // [190] 0xbe
false, // [191] 0xbf
false, // [192] 0xc0
false, // [193] 0xc1
false, // [194] 0xc2
false, // [195] 0xc3
false, // [196] 0xc4
false, // [197] 0xc5
false, // [198] 0xc6
false, // [199] 0xc7
false, // [200] 0xc8
false, // [201] 0xc9
false, // [202] 0xca
false, // [203] 0xcb
false, // [204] 0xcc
false, // [205] 0xcd
false, // [206] 0xce
false, // [207] 0xcf
false, // [208] 0xd0
false, // [209] 0xd1
false, // [210] 0xd2
false, // [211] 0xd3
false, // [212] 0xd4
false, // [213] 0xd5
false, // [214] 0xd6
false, // [215] 0xd7
false, // [216] 0xd8
false, // [217] 0xd9
false, // [218] 0xda
false, // [219] 0xdb
false, // [220] 0xdc
false, // [221] 0xdd
false, // [222] 0xde
false, // [223] 0xdf
false, // [224] 0xe0
false, // [225] 0xe1
false, // [226] 0xe2
false, // [227] 0xe3
false, // [228] 0xe4
false, // [229] 0xe5
false, // [230] 0xe6
false, // [231] 0xe7
false, // [232] 0xe8
false, // [233] 0xe9
false, // [234] 0xea
false, // [235] 0xeb
false, // [236] 0xec
false, // [237] 0xed
false, // [238] 0xee
false, // [239] 0xef
false, // [240] 0xf0
false, // [241] 0xf1
false, // [242] 0xf2
false, // [243] 0xf3
false, // [244] 0xf4
false, // [245] 0xf5
false, // [246] 0xf6
false, // [247] 0xf7
false, // [248] 0xf8
false, // [249] 0xf9
false, // [250] 0xfa
false, // [251] 0xfb
false, // [252] 0xfc
false, // [253] 0xfd
false, // [254] 0xfe
false, // [255] 0xff
};
float g_AminoFreqs[20] =
{
0.0777f, // 'A' = Ala
0.0161f, // 'C' = Cys
0.0527f, // 'D' = Asp
0.0631f, // 'E' = Glu
0.0417f, // 'F' = Phe
0.0718f, // 'G' = Gly
0.0238f, // 'H' = His
0.0606f, // 'I' = Ile
0.0601f, // 'K' = Lys
0.0906f, // 'L' = Leu
0.0233f, // 'M' = Met
0.0439f, // 'N' = Asn
0.0456f, // 'P' = Pro
0.0368f, // 'Q' = Gln
0.0526f, // 'R' = Arg
0.0639f, // 'S' = Ser
0.0570f, // 'T' = Thr
0.0712f, // 'V' = Val
0.0134f, // 'W' = Trp
0.0339f, // 'Y' = Tyr
};
muscle-5.1.0/src/alpha3.h 0000664 0000000 0000000 00000003417 14244530626 0015130 0 ustar 00root root 0000000 0000000 #ifndef alpha3_h
#define alpha3_h
#include
#include
using namespace std;
const byte INVALID_LETTER = 0xff;
const byte INVALID_CHAR = '?';
const unsigned BAD_WORD = UINT_MAX;
extern byte g_AminoAcidChars[];
extern byte g_CharToLetterAmino[];
extern byte g_CharToLetterAminoStop[];
extern byte g_CharToLetterAminoGap[];
extern byte g_LetterToCharAmino[];
extern byte g_LetterToCharAminoGap[];
extern byte g_CharToLetterNucleo[];
extern byte g_CharToLetterNucleoGap[];
extern byte g_CharToLetterNucleoMasked[];
extern byte g_LetterToCharNucleo[];
extern byte g_LetterToCharNucleoGap[];
extern byte g_CodonWordToAminoLetter[];
extern byte g_CodonWordToAminoChar[];
extern byte g_CharToCompChar[];
extern byte g_CharToCompLetter[];
extern byte g_IUPAC_PairCharToChar1[256];
extern byte g_IUPAC_PairCharToChar2[256];
extern byte g_IUPAC_PairCharToCharCase[256];
extern byte g_CharToLetterSEB8[256];
extern bool **g_MatchMxNucleo;
extern bool **g_MatchMxAmino;
extern bool g_IsAminoChar[];
extern bool g_IsNucleoChar[];
extern bool g_IsACGTU[];
extern bool g_IsSeqChar[];
extern float g_AminoFreqs[];
extern unsigned g_CharToLetterRed[];
extern byte g_LetterToCharRed[];
extern unsigned g_RedAlphaSize;
void LogRedAlphaRed();
void ReadRedAlphaFromFile(const string &FileName);
byte GetAminoCharFrom3NucChars(byte c1, byte c2,
byte c3);
const char *WordToStr(unsigned Word, unsigned WordLength, bool Nucleo);
const char *WordToStrNucleo(unsigned Word, unsigned WordLength);
const char *WordToStrAmino(unsigned Word, unsigned WordLength);
const char *WordToStrAmino2(unsigned Word, unsigned WordLength, char *Str);
static inline bool isgap(byte c)
{
return c == '-' || c == '.';
}
void InitAlpha();
byte IUPAC_Pair(byte CharOrWildcard1, byte CharOrWildcard2);
#endif // alpha3_h
muscle-5.1.0/src/assertsameseqs.cpp 0000664 0000000 0000000 00000016304 14244530626 0017355 0 ustar 00root root 0000000 0000000 #include "muscle.h"
static uint g_AssertOkCount = 0;
void _AssertSeqsEq(const char *FileName, uint LineNr,
const MultiSequence &MSA1, const MultiSequence &MSA2)
{
const uint SeqCount1 = MSA1.GetSeqCount();
for (uint SeqIndex1 = 0; SeqIndex1 < SeqCount1; ++SeqIndex1)
{
const Sequence *Seq1 = MSA1.GetSequence((int) SeqIndex1);
const string &Label = Seq1->m_Label;
uint SeqIndex2 = MSA2.GetSeqIndex(Label);
const Sequence *Seq2 = MSA2.GetSequence((int) SeqIndex2);
uint GSI1 = Seq1->GetGSI();
uint GSI2 = Seq2->GetGSI();
Sequence *uSeq1 = Seq1->DeleteGaps();
Sequence *uSeq2 = Seq2->DeleteGaps();
int Length1 = uSeq1->GetLength();
int Length2 = uSeq2->GetLength();
const vector &v1 = uSeq1->m_CharVec;
const vector &v2 = uSeq2->m_CharVec;
if (v1 != v2 || GSI1 != GSI2)
{
Log("\n");
Log("AssertSeqsEq >%s\n", Label.c_str());
Log("GI1 %u, GI2 %u\n", GSI1, GSI2);
Log("Seq1[%d] ", Length1);
for (int i = 1; i < Length1; ++i)
Log("%c", v1[i]);
Log("\n");
Log("Seq2[%d] ", Length2);
for (int i = 1; i < Length2; ++i)
Log("%c", v2[i]);
Log("\n");
Die("AssertSeqsEq %s:%u", FileName, LineNr);
}
DeleteSequence(uSeq1);
DeleteSequence(uSeq2);
}
}
void _AssertSeqsEqInput(const char *File, uint Line, const MultiSequence &MS)
{
const MultiSequence &GlobalMS = GetGlobalInputMS();
const uint GN = GetGlobalMSSeqCount();
const uint SeqCount = MS.GetSeqCount();
set GSIs;
for (uint i = 0; i < SeqCount; ++i)
{
const Sequence *Seq = MS.GetSequence(i);
uint GSI = Seq->GetGSI();
if (GSI >= GN)
{
MS.LogGSIs();
Die("%s:%u AssertSeqsEqInput GSI1=%u > GN=%u",
File, Line, GSI, GN);
}
if (GSIs.find(GSI) != GSIs.end())
{
MS.LogGSIs();
Die("%s:%u AssertSeqsEqInput dupe GSI=%u",
File, Line, GSI);
}
const Sequence *InputSeq = GlobalMS.GetSequence(GSI);
const string &Label = string(MS.GetLabel(i));
const string &GlobalLabel = InputSeq->m_Label;
if (GlobalLabel != Label)
{
MS.LogGSIs();
Die("%s:%u AssertSeqsEqInput Seq(%u) GSI %u label '%s' != '%s'",
File, Line, i, GSI, Label.c_str(), GlobalLabel.c_str());
}
GSIs.insert(GSI);
const Sequence *UngappedInputSeq = InputSeq->DeleteGaps();
const uint L = UngappedInputSeq->GetLength();
const Sequence *UngappedSeq = Seq->DeleteGaps();
const uint MSL = UngappedSeq->GetLength();
if (L != MSL)
Die("%s:%u AssertSeqsEqInput Seq(%u) GSI=%u L=%u, MSL=%u, label=%s",
File, Line, i, GSI, L, MSL, Label.c_str());
for (uint Pos = 0; Pos < L; ++Pos)
{
char InputChar = UngappedInputSeq->GetChar(Pos);
char Char = UngappedSeq->GetChar(Pos);
if (toupper(InputChar) != toupper(Char))
Die("%s:%u AssertSeqsEqInput Seq(%u) GSI=%u Pos[%u]=%c,%c label=%s",
File, Line, i, GSI, Pos, Char, InputChar, Label.c_str());
}
DeleteSequence(UngappedInputSeq);
DeleteSequence(UngappedSeq);
}
}
void _AssertSameSeqsVec(const char *File, uint Line,
const MultiSequence &MS, vector &v)
{
MultiSequence *CombinedMS = new MultiSequence;
const uint N = SIZE(v);
for (uint i = 0; i < N; ++i)
{
const MultiSequence *MS = v[i];
const uint n = MS->GetSeqCount();
for (uint j = 0; j < n; ++j)
{
const Sequence *Seq = MS->GetSequence(j);
CombinedMS->AddSequence(Seq, false);
}
}
_AssertSameSeqs(File, Line, MS, *CombinedMS);
++g_AssertOkCount;
delete CombinedMS;
}
void _AssertSameSeqsVec(const char *File, uint Line,
const MultiSequence &MS, vector &v)
{
MultiSequence *CombinedMS = new MultiSequence;
const uint N = SIZE(v);
for (uint i = 0; i < N; ++i)
{
const MultiSequence *MS = v[i];
const uint n = MS->GetSeqCount();
for (uint j = 0; j < n; ++j)
{
const Sequence *Seq = MS->GetSequence(j);
CombinedMS->AddSequence(Seq, false);
}
}
_AssertSameSeqs(File, Line, MS, *CombinedMS);
++g_AssertOkCount;
delete CombinedMS;
}
void _AssertSameSeqsJoin(const char *File, uint Line,
const MultiSequence &MS1, const MultiSequence &MS2, const MultiSequence &MS12)
{
vector v;
v.push_back(&MS1);
v.push_back(&MS2);
_AssertSameSeqsVec(File, Line, MS12, v);
}
uint GetAssertSameSeqsOkCount()
{
return g_AssertOkCount;
}
void _AssertSameLabels(const char *File, uint Line, const MultiSequence &MS)
{
const MultiSequence &GlobalMS = GetGlobalInputMS();
const uint GN = GetGlobalMSSeqCount();
const uint SeqCount = MS.GetSeqCount();
set GSIs;
for (uint i = 0; i < SeqCount; ++i)
{
const Sequence *Seq = MS.GetSequence(i);
uint GSI = Seq->GetGSI();
if (GSI >= GN)
{
MS.LogGSIs();
Die("%s:%u AssertSameLabels GSI1=%u > GN=%u",
File, Line, GSI, GN);
}
if (GSIs.find(GSI) != GSIs.end())
{
MS.LogGSIs();
Die("%s:%u AssertSameLabels dupe GSI=%u",
File, Line, GSI);
}
const string &Label = string(MS.GetLabel(i));
const string &GlobalLabel = string(GlobalMS.GetLabel(GSI));
if (GlobalLabel != Label)
{
MS.LogGSIs();
Die("%s:%u AssertSameLabels Seq(%u) GSI %u label '%s' != '%s'",
File, Line, i, GSI, Label.c_str(), GlobalLabel.c_str());
}
GSIs.insert(GSI);
}
}
void _AssertSameSeqs(const char *File, uint Line,
const MultiSequence &MS1, const MultiSequence &MS2)
{
const MultiSequence &GlobalMS = GetGlobalInputMS();
const uint GN = GetGlobalMSSeqCount();
const uint SeqCount = MS1.GetSeqCount();
const uint SeqCount2 = MS2.GetSeqCount();
if (SeqCount2 != SeqCount)
Die("%s:%u AssertSameSeqs N1=%u, N22=%u",
File, Line, SeqCount, SeqCount2);
set GSIs1;
set GSIs2;
for (uint i = 0; i < SeqCount; ++i)
{
const Sequence *Seq1 = MS1.GetSequence(i);
const Sequence *Seq2 = MS2.GetSequence(i);
uint GSI1 = Seq1->GetGSI();
uint GSI2 = Seq2->GetGSI();
if (GSI1 >= GN)
Die("%s:%u AssertSameSeqs GSI1=%u > GN=%u",
File, Line, GSI1, GN);
if (GSI2 >= GN)
Die("%s:%u AssertSameSeqs GSI2=%u > GN=%u",
File, Line, GSI2, GN);
if (GSIs1.find(GSI1) != GSIs1.end())
{
MS1.LogGSIs();
Die("%s:%u AssertSameSeqs dupe GSI1=%u",
File, Line, GSI1);
}
if (GSIs2.find(GSI2) != GSIs2.end())
{
MS2.LogGSIs();
Die("%s:%u AssertSameSeqs dupe GSI2=%u",
File, Line, GSI2, GN);
}
const string &Label1 = string(MS1.GetLabel(i));
const string &Label2 = string(MS2.GetLabel(i));
const string &GlobalLabel1 = string(GlobalMS.GetLabel(GSI1));
const string &GlobalLabel2 = string(GlobalMS.GetLabel(GSI2));
if (GlobalLabel1 != Label1)
Die("%s:%u AssertSameSeqs Seq1(%u) GI %u label '%s' != '%s'",
File, Line, i, GSI1, Label1.c_str(), GlobalLabel1.c_str());
if (GlobalLabel2 != Label2)
Die("%s:%u AssertSameSeqs Seq2(%u) GI %u label '%s' != '%s'",
File, Line, i, GSI2, Label2.c_str(), GlobalLabel2.c_str());
GSIs1.insert(GSI1);
GSIs2.insert(GSI2);
}
for (set::const_iterator p = GSIs1.begin();
p != GSIs1.end(); ++p)
{
uint GSI1 = *p;
if (GSIs2.find(GSI1) == GSIs2.end())
Die("%s:%u AssertSameSeqs GSI1=%u missing in MS2",
File, Line, GSI1);
}
for (set::const_iterator p = GSIs2.begin();
p != GSIs2.end(); ++p)
{
uint GSI2 = *p;
if (GSIs1.find(GSI2) == GSIs1.end())
Die("%s:%u AssertSameSeqs GSI2=%u missing in MS1",
File, Line, GSI2);
}
++g_AssertOkCount;
}
muscle-5.1.0/src/best3.h 0000664 0000000 0000000 00000001301 14244530626 0014766 0 ustar 00root root 0000000 0000000 #pragma once
// Store the largest of three values x1, x2, and x3 in *x.
// If x_i is the largest value, then store b_i in *b.
static inline void Best3(float x1, float x2, float x3,
char b1, char b2, char b3, float *x, char *b)
{
if (x1 >= x2)
{
if (x1 >= x3)
{
*x = x1;
*b = b1;
return;
}
*x = x3;
*b = b3;
return;
}
if (x2 >= x3)
{
*x = x2;
*b = b2;
return;
}
*x = x3;
*b = b3;
}
// Store the largest of three values x1, x2, and x3 in *x.
static inline void Best3(float x1, float x2, float x3, float *x)
{
if (x1 >= x2)
{
if (x1 >= x3)
{
*x = x1;
return;
}
*x = x3;
return;
}
if (x2 >= x3)
{
*x = x2;
return;
}
*x = x3;
}
muscle-5.1.0/src/build_describe.h 0000664 0000000 0000000 00000000035 14244530626 0016710 0 ustar 00root root 0000000 0000000 "
v5.0.1428-14-gc683-dirty
"
muscle-5.1.0/src/build_time.h 0000664 0000000 0000000 00000000027 14244530626 0016067 0 ustar 00root root 0000000 0000000 __DATE__ " " __TIME__
muscle-5.1.0/src/buildposterior3flat.cpp 0000664 0000000 0000000 00000005341 14244530626 0020311 0 ustar 00root root 0000000 0000000 #include "muscle.h"
// Builds a posterior probability matrix needed to align a pair
// of alignments. Mathematically, the returned matrix M is
// defined as follows:
// M[SeqIndex1,SeqIndex2] = sum sum f(s,t,SeqIndex1,SeqIndex2)
// s in align1 t in align2
// where
// [ P(s[SeqIndex1] <--> t[SeqIndex2])
// [ if s[SeqIndex1] is a letter in the ith column of align1 and
// [ t[SeqIndex2] it a letter in the jth column of align2
// f(s,t,SeqIndex1,SeqIndex2) = [
// [ 0 otherwise
//
// This is a variant of BuildPosterior() where sparse posterior matrices
// contain all pairs with one sequence from MSA1 and the other from MSA2,
// rather than all pairs in the union as in CalcPostFlat.
void CalcPosteriorFlat3(const MultiSequence &MSA1,
const MultiSequence &MSA2,
const vector &SeqIndexes1,
const vector &SeqIndexes2,
const vector &SparseMxs,
float *Flat)
{
const uint SeqCount1 = MSA1.GetSeqCount();
const uint SeqCount2 = MSA1.GetSeqCount();
const uint ColCount1 = MSA1.GetColCount();
const uint ColCount2 = MSA2.GetColCount();
const uint FlatSize = ColCount1*ColCount2;
for (uint i = 0; i < FlatSize; ++i)
Flat[i] = 0;
vector PosToCol1;
vector PosToCol2;
// May be subset of all pairs due to sampling
const uint PairCount = SIZE(SparseMxs);
for (uint PairIndex = 0; PairIndex < PairCount; ++PairIndex)
{
uint SeqIndex1 = SeqIndexes1[PairIndex];
uint SeqIndex2 = SeqIndexes2[PairIndex];
const Sequence *Seq1 = MSA1.GetSequence(SeqIndex1);
const Sequence *Seq2 = MSA2.GetSequence(SeqIndex2);
const uint ColCountSeq1 = Seq1->GetLength();
const uint ColCountSeq2 = Seq2->GetLength();
asserta(ColCountSeq1 == ColCount1);
asserta(ColCountSeq2 == ColCount2);
const MySparseMx &PostMx12 = *SparseMxs[PairIndex];
const uint L1 = PostMx12.GetLX();
const uint L2 = PostMx12.GetLY();
Seq1->GetPosToCol(PosToCol1);
Seq2->GetPosToCol(PosToCol2);
asserta(SIZE(PosToCol1) == L1);
asserta(SIZE(PosToCol2) == L2);
for (uint Pos1 = 0; Pos1 < L1; ++Pos1)
{
uint Offset = PostMx12.GetOffset(Pos1);
uint RowSize = PostMx12.GetSize(Pos1);
assert(Pos1 < SIZE(PosToCol1));
uint Col1 = PosToCol1[Pos1];
uint FlatBase = Col1 * ColCount2;
for (uint k = 0; k < RowSize; ++k)
{
float Prob = PostMx12.GetProb_Offset(Offset + k);
uint Pos2 = PostMx12.GetCol_Offset(Offset + k);
assert(Pos2 < SIZE(PosToCol2));
uint Col2 = PosToCol2[Pos2];
uint FlatOffset = FlatBase + Col2;
assert(FlatOffset < FlatSize);
Flat[FlatOffset] += Prob;
}
}
}
}
muscle-5.1.0/src/buildpostflat.cpp 0000664 0000000 0000000 00000005244 14244530626 0017167 0 ustar 00root root 0000000 0000000 #include "muscle.h"
#include "mpcflat.h"
#define TRACE 0
// Builds a posterior probability matrix needed to align a pair
// of alignments. Mathematically, the returned matrix M is
// defined as follows:
// M[i,j] = sum sum f(s,t,i,j)
// s in align1 t in align2
// where
// [ P(s[i] <--> t[j])
// [ if s[i] is a letter in the ith column of align1 and
// [ t[j] is a letter in the jth column of align2
// f(s,t,i,j) = [
// [ 0 otherwise
//
void MPCFlat::BuildPost(const MultiSequence &MSA1, const MultiSequence &MSA2,
float *Post)
{
const uint SeqCount1 = MSA1.GetSeqCount();
const uint SeqCount2 = MSA2.GetSeqCount();
const uint ColCount1 = MSA1.GetColCount();
const uint ColCount2 = MSA2.GetColCount();
uint Ix = 0;
for (uint i = 0; i < ColCount1; ++i)
for (uint j = 0; j < ColCount2; ++j)
Post[Ix++] = 0;
// for each s in MSA1
vector PosToCol1;
vector PosToCol2;
for (uint SeqIndex1 = 0; SeqIndex1 < SeqCount1; ++SeqIndex1)
{
const Sequence *Seq1 = MSA1.GetSequence(SeqIndex1);
uint SMI_1 = Seq1->GetSMI();
asserta(SMI_1 != UINT_MAX);
Seq1->GetPosToCol(PosToCol1);
// for each t in MSA2
for (uint SeqIndex2 = 0; SeqIndex2 < SeqCount2; SeqIndex2++)
{
const Sequence *Seq2 = MSA2.GetSequence(SeqIndex2);
uint SMI_2 = Seq2->GetSMI();
asserta(SMI_2 != UINT_MAX);
asserta(SMI_1 != SMI_2);
Seq2->GetPosToCol(PosToCol2);
if (SMI_1 < SMI_2)
{
uint PairIndex = GetPairIndex(SMI_1, SMI_2);
const MySparseMx &Mx = GetSparsePost(PairIndex);
const uint LX = Mx.GetLX();
const uint LY = Mx.GetLY();
for (uint i = 0; i < LX; ++i)
{
uint Col1 = PosToCol1[i];
uint Offset = Mx.GetOffset(i);
uint Size = Mx.GetSize(i);
for (uint k = 0; k < Size; ++k)
{
float P = Mx.GetProb_Offset(Offset);
uint j = Mx.GetCol_Offset(Offset);
++Offset;
uint Col2 = PosToCol2[j];
Post[Col1*ColCount2 + Col2] += P;
}
}
}
else
{
uint PairIndex = GetPairIndex(SMI_2, SMI_1);
const MySparseMx &Mx = GetSparsePost(PairIndex);
const uint LX = Mx.GetLX();
const uint LY = Mx.GetLY();
for (uint i = 0; i < LX; ++i)
{
uint Col2 = PosToCol2[i];
uint Offset = Mx.GetOffset(i);
uint Size = Mx.GetSize(i);
for (uint k = 0; k < Size; ++k)
{
float P = Mx.GetProb_Offset(Offset);
uint j = Mx.GetCol_Offset(Offset);
++Offset;
uint Col1 = PosToCol1[j];
Post[Col1*ColCount2 + Col2] += P;
}
}
}
}
}
#if 0//TRACE
LogFlatMx("MSAPost", Post, ColCount1, ColCount2);
#endif
}
muscle-5.1.0/src/bwdflat3.cpp 0000664 0000000 0000000 00000010560 14244530626 0016016 0 ustar 00root root 0000000 0000000 #include "muscle.h"
/***
Bwd[s][i][j] =
probability of starting in state s and aligning
last (LX-i) letters of X to
last (LY-j) letters of Y.
***/
void CalcBwdFlat(const byte *X, uint LX, const byte *Y, uint LY, float *Flat)
{
#include "hmmscores.h"
const int iLX = int(LX);
const int iLY = int(LY);
const int LY1 = LY+1;
const int BaseInc_i = HMMSTATE_COUNT*LY1;
const int BaseInc_j = HMMSTATE_COUNT;
uint Base = HMMSTATE_COUNT*(0*(LY1) + LY);
for (int i = 0; i < iLX; ++i)
{
Flat[Base + HMMSTATE_IY] = LOG_ZERO;
Flat[Base + HMMSTATE_JY] = LOG_ZERO;
Base += BaseInc_i;
}
Base = HMMSTATE_COUNT*(LX*(LY1) + 0);
for (int j = 0; j < iLY; ++j)
{
Flat[Base + HMMSTATE_IX] = LOG_ZERO;
Flat[Base + HMMSTATE_JX] = LOG_ZERO;
Base += BaseInc_j;
}
int Base_i_j = (int) HMMSTATE_COUNT*(LX*(LY1) + LY);
int Base_i1_j = Base_i_j + BaseInc_i;
int Base_i_j1 = Base_i_j + BaseInc_j;
int Base_i1_j1 = Base_i_j + BaseInc_i + BaseInc_j;
for (int i = iLX; i >= 0; --i)
{
char x = (i == iLX ? 0 : X[i]);
float Emit_x = InsScore[x];
for (int j = iLY; j >= 0; --j)
{
if (i == LX && j == LY)
{
// Special case for end-of-alignment
Flat[Base_i_j + HMMSTATE_M] = tSM;
Flat[Base_i_j + HMMSTATE_IX] = tSI;
Flat[Base_i_j + HMMSTATE_IY] = tSI;
Flat[Base_i_j + HMMSTATE_JX] = tSJ;
Flat[Base_i_j + HMMSTATE_JY] = tSJ;
Base_i_j -= BaseInc_j;
Base_i1_j -= BaseInc_j;
Base_i_j1 -= BaseInc_j;
Base_i1_j1 -= BaseInc_j;
continue;
}
char y = (j == iLY ? 0 : Y[j]);
float Emit_y = InsScore[y];
float Emit_xy = MatchScore[x][y];
if (i < iLX && j < iLY)
{
float NextM = Flat[Base_i1_j1 + HMMSTATE_M] + Emit_xy;
float NextIX = Flat[Base_i1_j + HMMSTATE_IX] + Emit_x;
float NextJX = Flat[Base_i1_j + HMMSTATE_JX] + Emit_x;
float NextIY = Flat[Base_i_j1 + HMMSTATE_IY] + Emit_y;
float NextJY = Flat[Base_i_j1 + HMMSTATE_JY] + Emit_y;
if (i > 0 && j > 0)
{
float M_M = tMM + NextM;
float M_IX = tMI + NextIX;
float M_JX = tMJ + NextJX;
float M_IY = tMI + NextIY;
float M_JY = tMJ + NextJY;
Flat[Base_i_j + HMMSTATE_M] = LOG_ADD(M_M, M_IX, M_JX, M_IY, M_JY);
}
else
Flat[Base_i_j + HMMSTATE_M] = LOG_ZERO;
if (i > 0)
{
float IX_IX = tII + NextIX;
float IX_M = tIM + NextM;
Flat[Base_i_j + HMMSTATE_IX] = LOG_ADD(IX_IX, IX_M);
float JX_JX = tJJ + NextJX;
float JX_M = tJM + NextM;
Flat[Base_i_j + HMMSTATE_JX] = LOG_ADD(JX_JX, JX_M);
}
else
{
Flat[Base_i_j + HMMSTATE_IX] = LOG_ZERO;
Flat[Base_i_j + HMMSTATE_JX] = LOG_ZERO;
}
if (j > 0)
{
float IY_IY = tII + NextIY;
float IY_M = tIM + NextM;
Flat[Base_i_j + HMMSTATE_IY] = LOG_ADD(IY_IY, IY_M);
float JY_JY = tJJ + NextJY;
float JY_M = tJM + NextM;
Flat[Base_i_j + HMMSTATE_JY] = LOG_ADD(JY_JY, JY_M);
}
else
{
Flat[Base_i_j + HMMSTATE_IY] = LOG_ZERO;
Flat[Base_i_j + HMMSTATE_JY] = LOG_ZERO;
}
Base_i_j -= BaseInc_j;
Base_i1_j -= BaseInc_j;
Base_i_j1 -= BaseInc_j;
Base_i1_j1 -= BaseInc_j;
continue;
}
if (i < iLX)
{
assert(j == iLY);
if (i > 0)
{
float NextIX = Flat[Base_i1_j + HMMSTATE_IX] + Emit_x;
float NextJX = Flat[Base_i1_j + HMMSTATE_JX] + Emit_x;
float M_IX = tMI + NextIX;
float M_JX = tMJ + NextJX;
Flat[Base_i_j + HMMSTATE_M] = LOG_ADD(M_IX, M_JX);
Flat[Base_i_j + HMMSTATE_IX] = tII + NextIX;
Flat[Base_i_j + HMMSTATE_JX] = tJJ + NextJX;
}
else
{
Flat[Base_i_j + HMMSTATE_M] = LOG_ZERO;
Flat[Base_i_j + HMMSTATE_IX] = LOG_ZERO;
Flat[Base_i_j + HMMSTATE_JX] = LOG_ZERO;
}
}
if (j < iLY)
{
assert(i == iLX);
float NextIY = Flat[Base_i_j1 + HMMSTATE_IY] + Emit_y;
float NextJY = Flat[Base_i_j1 + HMMSTATE_JY] + Emit_y;
float M_IY = tMI + NextIY;
float M_JY = tMJ + NextJY;
if (j > 0)
{
Flat[Base_i_j + HMMSTATE_M] = LOG_ADD(M_IY, M_JY);
Flat[Base_i_j + HMMSTATE_IY] = tII + NextIY;
Flat[Base_i_j + HMMSTATE_JY] = tJJ + NextJY;
}
else
{
Flat[Base_i_j + HMMSTATE_M] = LOG_ZERO;
Flat[Base_i_j + HMMSTATE_IY] = LOG_ZERO;
Flat[Base_i_j + HMMSTATE_JY] = LOG_ZERO;
}
}
Base_i_j -= BaseInc_j;
Base_i1_j -= BaseInc_j;
Base_i_j1 -= BaseInc_j;
Base_i1_j1 -= BaseInc_j;
}
}
}
muscle-5.1.0/src/calcalnflat.cpp 0000664 0000000 0000000 00000001547 14244530626 0016561 0 ustar 00root root 0000000 0000000 #include "muscle.h"
#include "best3.h"
void TraceBackFlat(const char *TB, uint LX, uint LY, string &Path);
float CalcAlnFlat(const float *Post, uint LX, uint LY,
float *DPRows, char *TB, string &Path)
{
Path.clear();
float *OldRow = DPRows;
float *NewRow = DPRows + (LY+1);
char *TBPtr = TB;
for (uint j = 0; j <= LY; ++j)
{
OldRow[j] = 0;
*TBPtr++ = 'Y';
}
const float *PostPtr = Post;
for (uint i = 1; i <= LX; ++i)
{
uint64 k = TBPtr - TB;
*TBPtr++ = 'X';
NewRow[0] = 0;
for (uint j = 1; j <= LY; ++j)
{
float B = OldRow[j-1] + *PostPtr++;
float X = OldRow[j];
float Y = NewRow[j-1];
float Best;
char TBChar;
Best3(B, X, Y, 'B', 'X', 'Y', &Best, &TBChar);
NewRow[j] = Best;
*TBPtr++ = TBChar;
}
swap(OldRow, NewRow);
}
float Score = OldRow[LY];
TraceBackFlat(TB, LX, LY, Path);
return Score;
}
muscle-5.1.0/src/calcalnscoreflat.cpp 0000664 0000000 0000000 00000001113 14244530626 0017602 0 ustar 00root root 0000000 0000000 #include "muscle.h"
#include "best3.h"
float CalcAlnScoreFlat(const float *Post, uint LX, uint LY, float *DPRows)
{
float *Row = DPRows;
for (uint j = 0; j <= LY; ++j)
Row[j] = 0;
const float *PostPtr = Post;
for (uint i = 1; i <= LX; ++i)
{
float Currj1 = 0;
float Prevj1 = Row[0];
Row[0] = 0;
for (uint j = 1; j <= LY; ++j)
{
float Prevj = Row[j];
float P = *PostPtr++;
float B = Prevj1 + P;
float X = Prevj;
float Y = Currj1;
Prevj1 = Row[j];
Best3(B, X, Y, &Currj1);
Row[j] = Currj1;
}
}
float Score = Row[LY];
return Score;
}
muscle-5.1.0/src/calcalnscoresparse.cpp 0000664 0000000 0000000 00000000663 14244530626 0020162 0 ustar 00root root 0000000 0000000 #include "muscle.h"
#include "best3.h"
float CalcAlnScoreSparse(const MySparseMx &Mx)
{
uint LX = Mx.m_LX;
uint LY = Mx.m_LY;
#if 0
float Sum = 0;
for (uint i = 0; i < Mx.m_LX; ++i)
Sum += Mx.GetMaxProbRow(i);
return Sum;
#endif
float *Post = AllocPost(LX, LY);
Mx.ToPost(Post);
float *DPRows = AllocDPRows(LX, LY);
float Score = CalcAlnScoreFlat(Post, LX, LY, DPRows);
myfree(Post);
myfree(DPRows);
return Score;
}
muscle-5.1.0/src/calcposteriorflat.cpp 0000664 0000000 0000000 00000004214 14244530626 0020027 0 ustar 00root root 0000000 0000000 #include "muscle.h"
#include "mpcflat.h"
void CalcPostFlat(const float *FlatFwd, const float *FlatBwd,
uint LX, uint LY, float *Post)
{
float Total = CalcTotalProbFlat(FlatFwd, FlatBwd, LX, LY);
uint IxFB = HMMSTATE_COUNT*((LY + 1) + 1); // M[1,1]
uint IxPost = 0;
for (uint i = 0; i < LX; ++i)
{
for (uint j = 0; j < LY; ++j)
{
float Score = FlatFwd[IxFB] + FlatBwd[IxFB] - Total;
if (Score < MIN_SPARSE_SCORE)
Post[IxPost++] = 0;
else
{
float P = (Score >= LOG_ONE ? 1.0f : expf(Score));
Post[IxPost++] = P;
}
IxFB += HMMSTATE_COUNT;
}
IxFB += HMMSTATE_COUNT;
}
}
void MPCFlat::CalcPosterior(uint PairIndex)
{
const pair &Pair = GetPair(PairIndex);
const uint SeqIndexX = Pair.first;
const uint SeqIndexY = Pair.second;
uint LX = GetL(SeqIndexX);
uint LY = GetL(SeqIndexY);
float *Fwd = AllocFB(LX, LY);
float *Bwd = AllocFB(LX, LY);
const byte *X = GetBytePtr(SeqIndexX);
const byte *Y = GetBytePtr(SeqIndexY);
CalcFwdFlat(X, LX, Y, LY, Fwd);
CalcBwdFlat(X, LX, Y, LY, Bwd);
float *Post = AllocPost(LX, LY);
CalcPostFlat(Fwd, Bwd, LX, LY, Post);
#if 0//TRACE
LogFlatMxs("FwdFlat", Fwd, LX, LY);
LogFlatMxs("BwdFlat", Bwd, LX, LY);
LogFlatMx("PostFlat", Post, LX, LY);
#endif
myfree(Fwd);
myfree(Bwd);
#if 0//TRACE
LogFlatMx1("Fwd", Fwd, LX, LY);
LogFlatMx1("Bwd", Bwd, LX, LY);
LogFlatMx("Post", Post, LX, LY);
#endif
MySparseMx &SparsePost = GetSparsePost(PairIndex);
SparsePost.FromPost(Post, LX, LY);
SparsePost.m_X = X;
SparsePost.m_Y = Y;
#if 0//TRACE
SparsePost.LogMe();
#endif
float *DPRows = AllocDPRows(LX, LY);
float Score = CalcAlnScoreFlat(Post, LX, LY, DPRows);
myfree(Post);
myfree(DPRows);
#if 0//TRACE
string Path;
char *TB = myalloc(char, (LX+1)*(LY+1));
float Score2 = CalcAlnFlat(Post, LX, LY, DPRows, TB, Path);
Log("Score=%.3g Score2=%.3g\n", Score, Score2);
myfree(TB);
#endif
float EA = Score/min(LX, LY);
#if 0//TRACE
const char *LabelX = GetLabel(SeqIndexX);
const char *LabelY = GetLabel(SeqIndexY);
Log("Flat EA(%s, %s) = %.3g\n", LabelX, LabelY, EA);
#endif
m_DistMx[SeqIndexX][SeqIndexY] = EA;
m_DistMx[SeqIndexY][SeqIndexX] = EA;
}
muscle-5.1.0/src/chainer.h 0000664 0000000 0000000 00000002653 14244530626 0015372 0 ustar 00root root 0000000 0000000 #pragma once
#include "hspfinder.h"
#include
const float BAD_SCORE = -9e9f;
struct HSPData;
// Bendpoint
struct BPData
{
uint Pos;
bool IsLo;
uint Index;
void LogMe() const
{
Log("BP%s Pos %u Ix %u", (IsLo ? "lo" : "hi"), Pos, Index);
}
};
struct ChainData
{
uint LastHSPIndex;
uint Ahi;
uint Bhi;
float Score;
};
class Chainer
{
public:
const vector *m_HSPs;
BPData *m_BPs;
uint *m_PrevHSPIndexes; // Predecessor in chain
float *m_HSPIndexToChainScore;
list m_Chains; // Live HSP indexes
public:
Chainer()
{
m_HSPs = 0;
m_BPs = 0;
m_PrevHSPIndexes = 0;
m_HSPIndexToChainScore = 0;
}
~Chainer()
{
Clear();
}
void Clear()
{
m_HSPs = 0;
m_Chains.clear();
myfree(m_BPs);
myfree(m_PrevHSPIndexes);
myfree(m_HSPIndexToChainScore);
m_BPs = 0;
m_PrevHSPIndexes = 0;
m_HSPIndexToChainScore = 0;
}
void Run(const vector &HSPs,
vector &Chain);
void LogMe() const;
void LogBPs() const;
public:
static void LogHSPs(const vector &HSPs);
static bool IsValidChain(const vector &HSPs);
static void AssertValidChain(const vector &HSPs);
static void LogChain(const vector &HSPs,
bool TestValid);
static float GetChainScore(HSPData **HSPs, uint HSPCount);
private:
void SetBPs();
void SortBPs();
uint FindBestChainLT(uint Ahi, uint Bhi);
};
muscle-5.1.0/src/cmds.h 0000664 0000000 0000000 00000001267 14244530626 0014707 0 ustar 00root root 0000000 0000000 #ifndef C
#error "C not defined"
#endif
C(align)
C(upgma5)
C(msastats)
C(pprog)
C(pprog2)
C(pprogt)
C(strip_gappy_cols)
C(strip_gappy_rows)
C(guide_tree_join_order)
C(eadistmx)
C(eadistmx_msas)
C(split_tree)
C(tree_subset_nodes)
C(consseq)
C(super4)
C(usorter)
C(permute_tree)
C(divide_tree)
C(qscore)
C(qscore2)
C(qscoredir)
C(eacluster)
C(derep)
C(uclust)
C(super5)
C(transaln)
C(hmmdump)
C(perturbhmm)
C(testlog)
C(resample)
C(disperse)
C(efastats)
C(fa2efa)
C(colscore_efa)
C(qscore_efa)
C(efa_bestconf)
C(efa_bestcols)
C(trimtoref)
C(trimtoref_efa)
C(efa_explode)
C(relabel)
C(addconfseq)
C(labels2randomchaintree)
C(maxcc)
C(letterconf)
C(letterconf_html)
C(make_a2m)
C(eesort)
#undef C
muscle-5.1.0/src/colscoreefa.cpp 0000664 0000000 0000000 00000004713 14244530626 0016600 0 ustar 00root root 0000000 0000000 #include "muscle.h"
#include "ensemble.h"
static const uint MAXBIN = 10;
static uint GetBin(double Conf)
{
asserta(Conf > 0 && Conf <= 1);
if (Conf == 1)
return MAXBIN;
uint Bin = uint(Conf*10);
asserta(Bin >= 0 && Bin < MAXBIN);
return Bin;
}
void cmd_colscore_efa()
{
const string EfaFileName = opt(colscore_efa);
const string RefFileName = opt(ref);
const string OutputFileName = opt(output);
double MaxGapFract = 0.5;
if (optset_max_gap_fract)
MaxGapFract = opt(max_gap_fract);
Ensemble E;
E.FromFile(EfaFileName);
MSA Ref;
Ref.FromFASTAFile_PreserveCase(RefFileName);
FILE *fOut = CreateStdioFile(OutputFileName);
const uint MSACount = E.GetMSACount();
E.SortMSA(Ref);
set RefUniqueIxs;
E.GetRefUniqueIxs(Ref, RefUniqueIxs, MaxGapFract);
const uint RefIxCount = SIZE(RefUniqueIxs);
uint RefUpperColCount = 0;
const uint RefColCount = Ref.GetColCount();
for (uint RefColIndex = 0; RefColIndex < RefColCount; ++RefColIndex)
if (Ref.ColIsUpper(RefColIndex, MaxGapFract))
++RefUpperColCount;
set > RefPosSet;
E.GetRefPosSet(Ref, MaxGapFract, RefPosSet);
vector BinToCount(MAXBIN+1);
vector BinToCorrectCount(MAXBIN+1);
double SumTC = 0;
for (uint MSAIndex = 0; MSAIndex < MSACount; ++MSAIndex)
{
vector TestUniqueIxs;
vector Confs;
E.GetTestUniqueIxs(MSAIndex, RefPosSet, TestUniqueIxs, Confs);
const uint TestIxCount = SIZE(TestUniqueIxs);
uint CorrectCount = 0;
for (uint i = 0; i < TestIxCount; ++i)
{
uint TestUniqueIx = TestUniqueIxs[i];
double Conf = Confs[i];
uint Bin = GetBin(Conf);
asserta(Bin <= MAXBIN);
++BinToCount[Bin];
bool Correct =
(RefUniqueIxs.find(TestUniqueIx) != RefUniqueIxs.end());
if (Correct)
{
++CorrectCount;
++BinToCorrectCount[Bin];
}
// Pf(fOut, "col %c %.4f\n", tof(Correct), Conf);
}
double TC = double(CorrectCount)/RefUpperColCount;
SumTC += TC;
//Pf(fOut, "tc %u %.4f\n", MSAIndex, TC);
}
double MeanTC = SumTC/MSACount;
Pf(fOut, "meantc %.4f\n", MeanTC);
ProgressLog("Mean TC %.4f\n", MeanTC);
ProgressLog("Bins ");
for (uint Bin = 0; Bin <= MAXBIN; ++Bin)
{
uint Count = BinToCount[Bin];
uint CorrectCount = BinToCorrectCount[Bin];
asserta(CorrectCount <= Count);
double P = 0;
if (Count > 0)
P = double(CorrectCount)/Count;
Pf(fOut, "bin %u %u %u %.4f\n",
Bin, Count, CorrectCount, P);
ProgressLog(" %.2f", P);
}
ProgressLog("\n");
CloseStdioFile(fOut);
}
muscle-5.1.0/src/consflat.cpp 0000664 0000000 0000000 00000001060 14244530626 0016114 0 ustar 00root root 0000000 0000000 #include "muscle.h"
#include "mpcflat.h"
#include "locallock.h"
void MPCFlat::ConsIter(uint Iter)
{
uint PairCount = SIZE(m_Pairs);
asserta(PairCount > 0);
unsigned ThreadCount = GetRequestedThreadCount();
uint PairCounter = 0;
#pragma omp parallel for num_threads(ThreadCount)
for (int PairIndex = 0; PairIndex < (int) PairCount; ++PairIndex)
{
Lock();
ProgressStep(PairCounter++, PairCount, "Consistency (%u/%u)",
Iter+1, m_ConsistencyIterCount);
Unlock();
ConsPair(PairIndex);
}
swap(m_ptrSparsePosts, m_ptrUpdatedSparsePosts);
}
muscle-5.1.0/src/conspairflat.cpp 0000664 0000000 0000000 00000005427 14244530626 0017003 0 ustar 00root root 0000000 0000000 #include "muscle.h"
#include "mpcflat.h"
#if 0//TRACE
const byte *g_X;
const byte *g_Y;
const byte *g_Z;
#endif
void MPCFlat::ConsPair(uint PairIndex)
{
const pair &Pair = GetPair(PairIndex);
uint SeqIndexX = Pair.first;
uint SeqIndexY = Pair.second;
const MySparseMx &SparsePostXY = GetSparsePost(PairIndex);
uint LX = GetSeqLength(SeqIndexX);
uint LY = GetSeqLength(SeqIndexY);
asserta(SparsePostXY.GetLX() == LX);
asserta(SparsePostXY.GetLY() == LY);
float *Post = AllocPost(LX, LY);
SparsePostXY.ToPost(Post);
// Account for Z=X and Z=Y (hence the factor 2)
for (uint k = 0; k < LX*LY; ++k)
Post[k] *= 2;
#if 0//TRACE
LogFlatMx("ConsPair Z=X Z=Y", Post, LX, LY);
#endif
const uint SeqCount = GetSeqCount();
asserta(SeqIndexX < SeqIndexY); // because convention for pairs
for (uint SeqIndexZ = 0; SeqIndexZ < SeqCount; ++SeqIndexZ)
{
if (SeqIndexZ == SeqIndexX || SeqIndexZ == SeqIndexY)
continue;
#if 0//TRACE
g_X = GetSequence(SeqIndexX)->GetBytePtr();
g_Y = GetSequence(SeqIndexY)->GetBytePtr();
g_Z = GetSequence(SeqIndexZ)->GetBytePtr();
#endif
if (SeqIndexZ < SeqIndexX)
{
asserta(SeqIndexZ < SeqIndexY); // because SeqIndexX < SeqIndexY
uint PairIndexZX = GetPairIndex(SeqIndexZ, SeqIndexX);
uint PairIndexZY = GetPairIndex(SeqIndexZ, SeqIndexY);
const MySparseMx &ZX = GetSparsePost(PairIndexZX);
const MySparseMx &ZY = GetSparsePost(PairIndexZY);
RelaxFlat_ZX_ZY(ZX, ZY, Post);
#if 0//TRACE
LogFlatMx("ConsPair after RelaxFlat_ZX_ZY", Post, LX, LY);
#endif
}
else if (SeqIndexZ > SeqIndexX && SeqIndexZ < SeqIndexY)
{
uint PairIndexXZ = GetPairIndex(SeqIndexX, SeqIndexZ);
uint PairIndexZY = GetPairIndex(SeqIndexZ, SeqIndexY);
const MySparseMx &XZ = GetSparsePost(PairIndexXZ);
const MySparseMx &ZY = GetSparsePost(PairIndexZY);
RelaxFlat_XZ_ZY(XZ, ZY, Post);
#if 0//TRACE
LogFlatMx("ConsPair after RelaxFlat_XZ_ZY", Post, LX, LY);
#endif
}
else if (SeqIndexZ > SeqIndexX && SeqIndexZ > SeqIndexY)
{
uint PairIndexXZ = GetPairIndex(SeqIndexX, SeqIndexZ);
uint PairIndexYZ = GetPairIndex(SeqIndexY, SeqIndexZ);
const MySparseMx &XZ = GetSparsePost(PairIndexXZ);
const MySparseMx &YZ = GetSparsePost(PairIndexYZ);
RelaxFlat_XZ_YZ(XZ, YZ, Post);
#if 0//TRACE
LogFlatMx("ConsPair after RelaxFlat_XZ_YZ", Post, LX, LY);
#endif
}
else
asserta(false);
}
MySparseMx &UpdatedSparsePostXY = GetUpdatedSparsePost(PairIndex);
#if 0//TRACE
LogFlatMx("Final post before update", Post, LX, LY);
#endif
UpdatedSparsePostXY.UpdateFromPost(SparsePostXY, Post, SeqCount);
UpdatedSparsePostXY.m_X = SparsePostXY.m_X;
UpdatedSparsePostXY.m_Y = SparsePostXY.m_Y;
myfree(Post);
#if 0//TRACE
Log("\nBefore:");
SparsePostXY.LogMe();
Log("\nUpdated:");
UpdatedSparsePostXY.LogMe();
#endif
}
muscle-5.1.0/src/countsort.h 0000664 0000000 0000000 00000001736 14244530626 0016022 0 ustar 00root root 0000000 0000000 #ifndef countsort_h
#define countsort_h
#include "gobuff.h"
class CountSortMem
{
public:
static const unsigned NVEC = 8;
public:
unsigned *m_Vecs[NVEC];
unsigned m_VecPos[NVEC];
unsigned m_MaxValueCount;
GoBuff m_Sizes;
GoBuff m_Offsets;
public:
CountSortMem()
{
m_MaxValueCount = 0;
memset_zero(m_Vecs, NVEC);
}
void Free()
{
for (unsigned i = 0; i < NVEC; ++i)
{
myfree(m_Vecs[i]);
m_Vecs[i] = 0;
}
m_MaxValueCount = 0;
}
void Alloc(unsigned ValueCount)
{
if (ValueCount <= m_MaxValueCount)
return;
Free();
m_MaxValueCount = ValueCount;
for (unsigned i = 0; i < NVEC; ++i)
m_Vecs[i] = myalloc(unsigned, m_MaxValueCount);
}
};
unsigned CountSortOrderDesc(const unsigned *Values, unsigned ValueCount,
CountSortMem &Mem, unsigned *Order);
unsigned CountSortSubsetDesc(const unsigned *Values, unsigned ValueCount,
CountSortMem &Mem, const unsigned *Subset, unsigned *Result);
#endif // countsort_h
muscle-5.1.0/src/defaulthmmparams.cpp 0000664 0000000 0000000 00000015173 14244530626 0017647 0 ustar 00root root 0000000 0000000 #include "myutils.h"
#include "hmmparams.h"
void HMMParams::GetDefaultHMMParams(bool Nucleo, vector &Lines)
{
if (Nucleo)
GetDefaultHMMParams_Nucleo(Lines);
else
GetDefaultHMMParams_Amino(Lines);
}
void HMMParams::GetDefaultHMMParams_Amino(vector &Lines)
{
Lines.clear();
#define ADD_STR(s) Lines.push_back(s);
ADD_STR("HMM aa")
ADD_STR("T.START_M 0.6")
ADD_STR("T.START_IS 0.02")
ADD_STR("T.START_IL 0.18")
ADD_STR("T.M_M 0.96")
ADD_STR("T.M_IS 0.012")
ADD_STR("T.M_IL 0.008")
ADD_STR("T.IS_IS 0.35")
ADD_STR("T.IS_M 0.65")
ADD_STR("T.IL_IL 0.90")
ADD_STR("T.IL_M 0.10")
ADD_STR("E.AA 0.023731")
ADD_STR("E.CA 0.0014551")
ADD_STR("E.CC 0.010135")
ADD_STR("E.DA 0.0022355")
ADD_STR("E.DC 0.00036798")
ADD_STR("E.DD 0.019112")
ADD_STR("E.EA 0.0033222")
ADD_STR("E.EC 0.00037956")
ADD_STR("E.ED 0.004968")
ADD_STR("E.EE 0.016766")
ADD_STR("E.FA 0.00165")
ADD_STR("E.FC 0.00052274")
ADD_STR("E.FD 0.00069041")
ADD_STR("E.FE 0.00078814")
ADD_STR("E.FF 0.01661")
ADD_STR("E.GA 0.005979")
ADD_STR("E.GC 0.00071206")
ADD_STR("E.GD 0.0023525")
ADD_STR("E.GE 0.0021486")
ADD_STR("E.GF 0.001152")
ADD_STR("E.GG 0.040629")
ADD_STR("E.HA 0.0011435")
ADD_STR("E.HC 0.00026421")
ADD_STR("E.HD 0.00097077")
ADD_STR("E.HE 0.0013177")
ADD_STR("E.HF 0.00072545")
ADD_STR("E.HG 0.001037")
ADD_STR("E.HH 0.00868")
ADD_STR("E.IA 0.0031885")
ADD_STR("E.IC 0.0009404")
ADD_STR("E.ID 0.0010536")
ADD_STR("E.IE 0.0012421")
ADD_STR("E.IF 0.0027995")
ADD_STR("E.IG 0.0014252")
ADD_STR("E.IH 0.00059716")
ADD_STR("E.II 0.017783")
ADD_STR("E.KA 0.0033169")
ADD_STR("E.KC 0.00046951")
ADD_STR("E.KD 0.0025252")
ADD_STR("E.KE 0.0042842")
ADD_STR("E.KF 0.00087222")
ADD_STR("E.KG 0.0025931")
ADD_STR("E.KH 0.0012138")
ADD_STR("E.KI 0.0015785")
ADD_STR("E.KK 0.016122")
ADD_STR("E.LA 0.0044958")
ADD_STR("E.LC 0.0013849")
ADD_STR("E.LD 0.0016197")
ADD_STR("E.LE 0.0022206")
ADD_STR("E.LF 0.0053337")
ADD_STR("E.LG 0.0021285")
ADD_STR("E.LH 0.0011175")
ADD_STR("E.LI 0.010718")
ADD_STR("E.LK 0.0025963")
ADD_STR("E.LL 0.035839")
ADD_STR("E.MA 0.0014888")
ADD_STR("E.MC 0.00037421")
ADD_STR("E.MD 0.00047808")
ADD_STR("E.ME 0.00076105")
ADD_STR("E.MF 0.0011611")
ADD_STR("E.MG 0.00066504")
ADD_STR("E.MH 0.00042237")
ADD_STR("E.MI 0.002241")
ADD_STR("E.MK 0.0009612")
ADD_STR("E.ML 0.0046194")
ADD_STR("E.MM 0.0040952")
ADD_STR("E.NA 0.0021023")
ADD_STR("E.NC 0.00042479")
ADD_STR("E.ND 0.0035354")
ADD_STR("E.NE 0.0022474")
ADD_STR("E.NF 0.00084658")
ADD_STR("E.NG 0.0028888")
ADD_STR("E.NH 0.001412")
ADD_STR("E.NI 0.0010427")
ADD_STR("E.NK 0.0025731")
ADD_STR("E.NL 0.0016028")
ADD_STR("E.NM 0.00063401")
ADD_STR("E.NN 0.012819")
ADD_STR("E.PA 0.0023062")
ADD_STR("E.PC 0.00034766")
ADD_STR("E.PD 0.0012538")
ADD_STR("E.PE 0.0015155")
ADD_STR("E.PF 0.00060701")
ADD_STR("E.PG 0.001556")
ADD_STR("E.PH 0.00049078")
ADD_STR("E.PI 0.0010377")
ADD_STR("E.PK 0.0015484")
ADD_STR("E.PL 0.0015731")
ADD_STR("E.PM 0.00046718")
ADD_STR("E.PN 0.0010028")
ADD_STR("E.PP 0.018461")
ADD_STR("E.QA 0.002191")
ADD_STR("E.QC 0.00032102")
ADD_STR("E.QD 0.0017678")
ADD_STR("E.QE 0.0034513")
ADD_STR("E.QF 0.00059248")
ADD_STR("E.QG 0.0014243")
ADD_STR("E.QH 0.001139")
ADD_STR("E.QI 0.0010088")
ADD_STR("E.QK 0.0031231")
ADD_STR("E.QL 0.0018055")
ADD_STR("E.QM 0.00075546")
ADD_STR("E.QN 0.0015822")
ADD_STR("E.QP 0.00090111")
ADD_STR("E.QQ 0.007566")
ADD_STR("E.RA 0.002445")
ADD_STR("E.RC 0.00044701")
ADD_STR("E.RD 0.0016166")
ADD_STR("E.RE 0.0026887")
ADD_STR("E.RF 0.00090768")
ADD_STR("E.RG 0.0019486")
ADD_STR("E.RH 0.001321")
ADD_STR("E.RI 0.0013814")
ADD_STR("E.RK 0.0059565")
ADD_STR("E.RL 0.0024681")
ADD_STR("E.RM 0.00076734")
ADD_STR("E.RN 0.0020778")
ADD_STR("E.RP 0.0010627")
ADD_STR("E.RQ 0.0025353")
ADD_STR("E.RR 0.017751")
ADD_STR("E.SA 0.0063175")
ADD_STR("E.SC 0.00094867")
ADD_STR("E.SD 0.0028523")
ADD_STR("E.SE 0.002939")
ADD_STR("E.SF 0.0011904")
ADD_STR("E.SG 0.0038196")
ADD_STR("E.SH 0.0011642")
ADD_STR("E.SI 0.0017357")
ADD_STR("E.SK 0.0031263")
ADD_STR("E.SL 0.0025096")
ADD_STR("E.SM 0.00087787")
ADD_STR("E.SN 0.003014")
ADD_STR("E.SP 0.0018004")
ADD_STR("E.SQ 0.0019115")
ADD_STR("E.SR 0.0022454")
ADD_STR("E.SS 0.013466")
ADD_STR("E.TA 0.0039")
ADD_STR("E.TC 0.00073798")
ADD_STR("E.TD 0.0018049")
ADD_STR("E.TE 0.0021676")
ADD_STR("E.TF 0.0010759")
ADD_STR("E.TG 0.0021484")
ADD_STR("E.TH 0.00077747")
ADD_STR("E.TI 0.0024897")
ADD_STR("E.TK 0.0025086")
ADD_STR("E.TL 0.0030227")
ADD_STR("E.TM 0.00093371")
ADD_STR("E.TN 0.0022014")
ADD_STR("E.TP 0.0014798")
ADD_STR("E.TQ 0.0015453")
ADD_STR("E.TR 0.0018605")
ADD_STR("E.TS 0.0048729")
ADD_STR("E.TT 0.012994")
ADD_STR("E.VA 0.0053324")
ADD_STR("E.VC 0.0011915")
ADD_STR("E.VD 0.0012792")
ADD_STR("E.VE 0.001787")
ADD_STR("E.VF 0.0025616")
ADD_STR("E.VG 0.0019458")
ADD_STR("E.VH 0.00071553")
ADD_STR("E.VI 0.01118")
ADD_STR("E.VK 0.002109")
ADD_STR("E.VL 0.0091446")
ADD_STR("E.VM 0.0019746")
ADD_STR("E.VN 0.0013661")
ADD_STR("E.VP 0.0013578")
ADD_STR("E.VQ 0.0013284")
ADD_STR("E.VR 0.0016936")
ADD_STR("E.VS 0.002416")
ADD_STR("E.VT 0.0034345")
ADD_STR("E.VV 0.020752")
ADD_STR("E.WA 0.00039119")
ADD_STR("E.WC 0.00010666")
ADD_STR("E.WD 0.00016015")
ADD_STR("E.WE 0.00023815")
ADD_STR("E.WF 0.00085751")
ADD_STR("E.WG 0.00038786")
ADD_STR("E.WH 0.00019097")
ADD_STR("E.WI 0.00039549")
ADD_STR("E.WK 0.00028448")
ADD_STR("E.WL 0.00076736")
ADD_STR("E.WM 0.00016253")
ADD_STR("E.WN 0.00021006")
ADD_STR("E.WP 0.00015674")
ADD_STR("E.WQ 0.00020592")
ADD_STR("E.WR 0.00029139")
ADD_STR("E.WS 0.00026525")
ADD_STR("E.WT 0.00024961")
ADD_STR("E.WV 0.00038538")
ADD_STR("E.WW 0.0056363")
ADD_STR("E.YA 0.0013184")
ADD_STR("E.YC 0.00036626")
ADD_STR("E.YD 0.00066005")
ADD_STR("E.YE 0.00092548")
ADD_STR("E.YF 0.0036874")
ADD_STR("E.YG 0.00089301")
ADD_STR("E.YH 0.0013104")
ADD_STR("E.YI 0.0012786")
ADD_STR("E.YK 0.0010082")
ADD_STR("E.YL 0.0021971")
ADD_STR("E.YM 0.00054105")
ADD_STR("E.YN 0.0007496")
ADD_STR("E.YP 0.00047608")
ADD_STR("E.YQ 0.00070192")
ADD_STR("E.YR 0.0009943")
ADD_STR("E.YS 0.0010265")
ADD_STR("E.YT 0.00094759")
ADD_STR("E.YV 0.00148")
ADD_STR("E.YW 0.00069226")
ADD_STR("E.YY 0.0099931")
#undef ADD_STR
}
void HMMParams::GetDefaultHMMParams_Nucleo(vector &Lines)
{
Lines.clear();
#define ADD_STR(s) Lines.push_back(s);
ADD_STR("HMM nt")
ADD_STR("T.START_M 0.6")
ADD_STR("T.START_IS 0.02")
ADD_STR("T.START_IL 0.18")
ADD_STR("T.M_M 0.96")
ADD_STR("T.M_IS 0.012")
ADD_STR("T.M_IL 0.008")
ADD_STR("T.IS_IS 0.35")
ADD_STR("T.IS_M 0.65")
ADD_STR("T.IL_IL 0.90")
ADD_STR("T.IL_M 0.10")
#define Diag "0.12"
#define Other "0.044"
ADD_STR("E.AA " Diag)
ADD_STR("E.CA " Other)
ADD_STR("E.CC " Diag)
ADD_STR("E.GA " Other)
ADD_STR("E.GC " Other)
ADD_STR("E.GG " Diag)
ADD_STR("E.TA " Other)
ADD_STR("E.TC " Other)
ADD_STR("E.TG " Other)
ADD_STR("E.TT " Diag)
#undef ADD_STR
}
muscle-5.1.0/src/derep.cpp 0000664 0000000 0000000 00000013054 14244530626 0015410 0 ustar 00root root 0000000 0000000 #include "muscle.h"
#include "derep.h"
void Derep::Clear()
{
m_SeqIndexToRepSeqIndex.clear();
m_RepSeqIndexes.clear();
m_RepSeqIndexToSeqIndexes.clear();
m_HashToSeqIndexes.clear();
}
// FNV64 hash
uint Derep::CalcHash(const Sequence *Seq) const
{
uint64 hash = 0xcbf29ce484222325uL;
const uint L = Seq->GetLength();
for (uint i = 0; i < L; ++i)
{
char c = Seq->GetChar(i);
byte b = (byte) tolower(c);
hash *= 1099511628211uL;
hash ^= (uint64) b;
}
uint h = uint(hash%m_SlotCount);
return h;
}
void Derep::Run(MultiSequence &InputSeqs)
{
Clear();
m_InputSeqs = &InputSeqs;
const uint InputSeqCount = InputSeqs.GetSeqCount();
m_SlotCount = 3*InputSeqCount + 7;
m_HashToSeqIndexes.resize(m_SlotCount);
m_SeqIndexToRepSeqIndex.resize(InputSeqCount, UINT_MAX);
m_RepSeqIndexToSeqIndexes.resize(InputSeqCount);
uint UniqueCount = 0;
uint DupeCount = 0;
for (uint SeqIndex = 0; SeqIndex < InputSeqCount; ++SeqIndex)
{
ProgressStep(SeqIndex, InputSeqCount, "Derep %u uniques, %u dupes",
UniqueCount, DupeCount);
uint RepSeqIndex = Search(SeqIndex);
if (RepSeqIndex == UINT_MAX)
{
AddToHash(SeqIndex);
asserta(SIZE(m_RepSeqIndexes) == UniqueCount);
m_RepSeqIndexes.push_back(SeqIndex);
m_RepSeqIndexToSeqIndexes[SeqIndex].push_back(SeqIndex);
m_SeqIndexToRepSeqIndex[SeqIndex] = SeqIndex;
++UniqueCount;
}
else
{
m_RepSeqIndexToSeqIndexes[RepSeqIndex].push_back(SeqIndex);
m_SeqIndexToRepSeqIndex[SeqIndex] = RepSeqIndex;
++DupeCount;
}
}
}
bool Derep::SeqsEq(uint SeqIndex1, uint SeqIndex2) const
{
const Sequence *Seq1 = m_InputSeqs->GetSequence(SeqIndex1);
const Sequence *Seq2 = m_InputSeqs->GetSequence(SeqIndex2);
const uint L = Seq1->GetLength();
const uint L2 = Seq2->GetLength();
if (L2 != L)
return false;
for (uint i = 0; i < L; ++i)
{
char c1 = Seq1->GetChar(i);
char c2 = Seq2->GetChar(i);
if (toupper(c1) != toupper(c2))
return false;
}
return true;
}
uint Derep::Search(uint SeqIndex) const
{
const Sequence *Seq = m_InputSeqs->GetSequence(SeqIndex);
asserta(Seq != 0);
uint h = CalcHash(Seq);
asserta(h < SIZE(m_HashToSeqIndexes));
const vector &Row = m_HashToSeqIndexes[h];
const uint n = SIZE(Row);
for (uint i = 0; i < n; ++i)
{
uint SeqIndex2 = Row[i];
if (SeqsEq(SeqIndex, SeqIndex2))
return SeqIndex2;
}
return UINT_MAX;
}
void Derep::AddToHash(uint SeqIndex)
{
const Sequence *Seq = m_InputSeqs->GetSequence(SeqIndex);
asserta(Seq != 0);
uint h = CalcHash(Seq);
asserta(h < SIZE(m_HashToSeqIndexes));
vector &Row = m_HashToSeqIndexes[h];
Row.push_back(SeqIndex);
}
void Derep::GetUniqueSeqs(MultiSequence &UniqueSeqs)
{
asserta(UniqueSeqs.GetSeqCount() == 0);
const uint UniqueCount = SIZE(m_RepSeqIndexes);
for (uint i = 0; i < UniqueCount; ++i)
{
uint SeqIndex = m_RepSeqIndexes[i];
const Sequence *Seq = m_InputSeqs->GetSequence(SeqIndex);
UniqueSeqs.AddSequence(Seq, false);
}
AssertSameLabels(UniqueSeqs);
}
void Derep::Validate() const
{
asserta(m_InputSeqs != 0);
const uint InputSeqCount = m_InputSeqs->GetSeqCount();
asserta(SIZE(m_SeqIndexToRepSeqIndex) == InputSeqCount);
asserta(SIZE(m_RepSeqIndexToSeqIndexes) == InputSeqCount);
const uint ClusterCount = SIZE(m_RepSeqIndexes);
set RepSeqIndexSet;
for (uint SeqIndex = 0; SeqIndex < InputSeqCount; ++SeqIndex)
{
uint RepSeqIndex = m_SeqIndexToRepSeqIndex[SeqIndex];
RepSeqIndexSet.insert(RepSeqIndex);
}
const uint RepSeqIndexCount = SIZE(m_RepSeqIndexes);
asserta(SIZE(RepSeqIndexSet) == RepSeqIndexCount);
for (uint i = 0; i < RepSeqIndexCount; ++i)
{
uint RepSeqIndex = m_RepSeqIndexes[i];
asserta(RepSeqIndexSet.find(RepSeqIndex) != RepSeqIndexSet.end());
const vector &MemberSeqIndexes =
m_RepSeqIndexToSeqIndexes[RepSeqIndex];
const uint MemberCount = SIZE(MemberSeqIndexes);
asserta(MemberCount > 0);
for (uint j = 0; j < MemberCount; ++j)
{
uint MemberSeqIndex = MemberSeqIndexes[j];
uint MemberRepSeqIndex = m_SeqIndexToRepSeqIndex[MemberSeqIndex];
asserta(MemberRepSeqIndex == RepSeqIndex);
}
}
}
void Derep::GetDupeGSIs(vector &GSIs,
vector &GlobalRepSeqIndexes) const
{
GSIs.clear();
GlobalRepSeqIndexes.clear();
const uint InputSeqCount = m_InputSeqs->GetSeqCount();
const uint GlobalMSSeqCount = GetGlobalMSSeqCount();
const uint ClusterCount = SIZE(m_RepSeqIndexes);
for (uint ClusterIndex = 0; ClusterIndex < ClusterCount; ++ClusterIndex)
{
uint RepSeqIndex = m_RepSeqIndexes[ClusterIndex];
asserta(RepSeqIndex < InputSeqCount);
const vector &MemberSeqIndexes =
m_RepSeqIndexToSeqIndexes[RepSeqIndex];
const uint MemberCount = SIZE(MemberSeqIndexes);
const Sequence *Seq = m_InputSeqs->GetSequence(RepSeqIndex);
uint GlobalRepSeqIndex = Seq->GetGSI();
asserta(GlobalRepSeqIndex < GlobalMSSeqCount);
asserta(MemberSeqIndexes[0] == RepSeqIndex);
for (uint i = 1; i < MemberCount; ++i)
{
uint MemberSeqIndex = MemberSeqIndexes[i];
const Sequence *Seq = m_InputSeqs->GetSequence(MemberSeqIndex);
uint GlobalMemberSeqIndex = Seq->GetGSI();
asserta(GlobalMemberSeqIndex < GlobalMSSeqCount);
GSIs.push_back(GlobalMemberSeqIndex);
GlobalRepSeqIndexes.push_back(GlobalRepSeqIndex);
}
}
}
void cmd_derep()
{
const string &InputFileName = opt(derep);
const string &OutputFileName = opt(output);
MultiSequence InputSeqs;
InputSeqs.FromFASTA(InputFileName);
Derep D;
D.Run(InputSeqs);
D.Validate();
MultiSequence *UniqueSeqs = new MultiSequence;
D.GetUniqueSeqs(*UniqueSeqs);
UniqueSeqs->WriteMFA(OutputFileName);
}
muscle-5.1.0/src/derep.h 0000664 0000000 0000000 00000001200 14244530626 0015043 0 ustar 00root root 0000000 0000000 #pragma once
class Derep
{
public:
MultiSequence *m_InputSeqs = 0;
vector m_SeqIndexToRepSeqIndex;
vector m_RepSeqIndexes;
vector > m_RepSeqIndexToSeqIndexes;
uint m_SlotCount = 0;
vector > m_HashToSeqIndexes;
public:
uint CalcHash(const Sequence *Seq) const;
void Clear();
void Run(MultiSequence &InputSeqs);
uint Search(uint SeqIndex) const;
void GetUniqueSeqs(MultiSequence &UniqueSeqs);
void AddToHash(uint SeqIndex);
bool SeqsEq(uint SeqIndex1, uint SeqIndex2) const;
void Validate() const;
void GetDupeGSIs(vector &GSIs,
vector &GlobalRepSeqIndexes) const;
};
muscle-5.1.0/src/diagbox.cpp 0000664 0000000 0000000 00000005543 14244530626 0015732 0 ustar 00root root 0000000 0000000 #include "myutils.h"
#include "diagbox.h"
#define TEST 0
/***
DiagBox represents a diagonal "rectangle" in the D.P. matrix.
i = 0..LA-1
j = 0..LB-1
d = LA - i + j = 1 .. LA+LB-1
j = d - LA + i
i = LA - d + j
***/
void GetDiagRange(uint LA, uint LB, uint d,
uint &mini, uint &minj, uint &maxi, uint &maxj)
{
if (d >= LA)
{
mini = 0;
maxi = min(LA+LB-1-d, LA-1);
minj = d - LA;
maxj = min(LB-1, d-1);
}
else
{
mini = LA-d;
maxi = min(LA+LB-1-d, LA-1);
minj = 0;
maxj = min(LB-1, d-1);
}
}
void GetDiagBox(uint LA, uint LB, uint DiagLo, uint DiagHi, DiagBox &Box)
{
asserta(DiagLo <= DiagHi);
asserta(DiagLo >= 1);
asserta(DiagHi <= LA + LB - 1);
Box.LA = LA;
Box.LB = LB;
Box.dlo = DiagLo;
Box.dhi = DiagHi;
GetDiagRange(LA, LB, DiagLo, Box.dlo_mini, Box.dlo_minj, Box.dlo_maxi, Box.dlo_maxj);
GetDiagRange(LA, LB, DiagHi, Box.dhi_mini, Box.dhi_minj, Box.dhi_maxi, Box.dhi_maxj);
}
void GetDiagLoHi(uint LA, uint LB, const char *Path,
uint &dlo, uint &dhi)
{
dlo = UINT_MAX;
dhi = UINT_MAX;
uint i = 0;
uint j = 0;
for (uint k = 0; ; ++k)
{
char c = Path[k];
if (c == 0)
break;
if (c == 'M')
{
uint d = LA - i + j;
if (dlo == UINT_MAX)
{
dlo = d;
dhi = d;
}
else
{
if (d < dlo)
dlo = d;
if (d > dhi)
dhi = d;
}
}
if (c == 'M' || c == 'D')
++i;
if (c == 'M' || c == 'I')
++j;
}
}
#if TEST
static void Test2(uint LA, uint LB, uint DiagLo, uint DiagHi)
{
DiagBox Box;
GetDiagBox(LA, LB, DiagLo, DiagHi, Box);
Box.LogMe();
Box.Validate();
}
static void Test1(uint LA, uint LB, uint d,
uint i, uint j, uint I, uint J)
{
uint mini, maxi, minj, maxj;
GetDiagRange(LA, LB, d, mini, minj, maxi, maxj);
Log("LA=%u LB=%u d=%u (%u,%u) (%u,%u) expected (%u,%u) (%u,%u)\n",
LA, LB, d, mini, minj, maxi, maxj, i, j, I, J);
asserta(mini == i);
asserta(maxi == I);
asserta(minj == j);
asserta(maxj == J);
}
void TestDiagBox()
{
Test2(16, 19, 17, 37);
Test1(5, 3, 1, 4, 0, 4, 0);
Test1(5, 3, 2, 3, 0, 4, 1);
Test1(5, 3, 3, 2, 0, 4, 2);
Test1(5, 3, 4, 1, 0, 3, 2);
Test1(5, 3, 5, 0, 0, 2, 2);
Test1(5, 3, 6, 0, 1, 1, 2);
Test1(5, 3, 7, 0, 2, 0, 2);
Test1(3, 5, 1, 2, 0, 2, 0);
Test1(3, 5, 2, 1, 0, 2, 1);
Test1(3, 5, 3, 0, 0, 2, 2);
Test1(3, 5, 4, 0, 1, 2, 3);
Test1(3, 5, 5, 0, 2, 2, 4);
Test1(3, 5, 6, 0, 3, 1, 4);
Test1(3, 5, 7, 0, 4, 0, 4);
Test1(5, 5, 1, 4, 0, 4, 0);
Test1(5, 5, 2, 3, 0, 4, 1);
Test1(5, 5, 3, 2, 0, 4, 2);
Test1(5, 5, 4, 1, 0, 4, 3);
Test1(5, 5, 5, 0, 0, 4, 4);
Test1(5, 5, 6, 0, 1, 3, 4);
Test1(5, 5, 7, 0, 2, 2, 4);
Test1(5, 5, 8, 0, 3, 1, 4);
Test1(5, 5, 9, 0, 4, 0, 4);
for (uint LA = 2; LA <= 5; ++LA)
for (uint LB = 2; LB <= 5; ++LB)
for (uint dlo = 1; dlo <= LA+LB-1; ++dlo)
for (uint dhi = dlo; dhi <= LA+LB-1; ++dhi)
Test2(LA, LB, dlo, dhi);
Log("\n");
Log("ALL OK\n");
}
#endif // TEST
muscle-5.1.0/src/diagbox.h 0000664 0000000 0000000 00000007167 14244530626 0015403 0 ustar 00root root 0000000 0000000 #pragma once
struct DiagBox;
void GetDiagBox(uint LA, uint LB, uint DiagLo, uint DiagHi, DiagBox &Box);
void GetDiagRange(uint LA, uint LB, uint d,
uint &mini, uint &minj, uint &maxi, uint &maxj);
void GetDiagLoHi(uint LA, uint LB, const char *Path,
uint &dlo, uint &dhi);
struct DiagBox
{
DiagBox()
{
}
DiagBox(uint LA_, uint LB_, uint DiagLo, uint DiagHi)
{
//GetDiagBox(LA, LB, DiagLo, DiagHi, *this);
//Validate();
Init(LA_, LB_, DiagLo, DiagHi);
}
void Init(uint LA_, uint LB_, uint DiagLo, uint DiagHi)
{
GetDiagBox(LA_, LB_, DiagLo, DiagHi, *this);
Validate();
}
uint LA;
uint LB;
uint dlo;
uint dhi;
uint dlo_mini;
uint dlo_minj;
uint dlo_maxi;
uint dlo_maxj;
uint dhi_mini;
uint dhi_minj;
uint dhi_maxi;
uint dhi_maxj;
uint GetDiag(uint i, uint j) const
{
return LA - i + j;
}
// i, j are positions 0..LA-1, 0..LB-1.
bool InBox(uint i, uint j) const
{
uint d = GetDiag(i, j);
return d >= dlo && d <= dhi;
}
/***
i, j are 0-based prefix lengths 0..LA, 0..LB.
A full path is in the box iff all match pairs are in the box.
A partial path that aligns a prefix of A to a prefix of B as
in D.P.) is in the box iff it is is the prefix of at least
one full path that is in the box.
A D.P. matrix entry X[i][j] is in the box iff there is at
least one full path aligning the first i letters of A and
the first j letters of B ending in a column of type X, i.e.
if there exists a partial path in the box that ends in X.
Assume terminals appear in all paths, and DI/ID forbidden.
Intuitively seems that by these definitions D is in box iff
DM or MD is in box, I is in box iff IM or MI is in box.
Don't have proof..
***/
bool InBoxDPM(uint i, uint j) const
{
// Special case for M[0][0]
if (i == 0 && j == 0)
return true;
if (i == 0 || j == 0)
return false;
uint d = GetDiag(i-1, j-1);
return d >= dlo && d <= dhi;
}
bool InBoxDPD(uint i, uint j) const
{
bool MD = i == 0 ? false : InBoxDPM(i-1, j);
bool DM = (i == LA || j == LB) ? false : InBoxDPM(i+1, j+1);
return MD || DM;
}
bool InBoxDPI(uint i, uint j) const
{
bool MI = j == 0 ? false : InBoxDPM(i, j-1);
bool IM = (i == LA || j == LB) ? false : InBoxDPM(i+1, j+1);
return MI || IM;
}
// d = LA - i + j = 1 .. LA+LB-1
void Validate() const
{
asserta(dlo <= dhi);
asserta(dlo >= GetDiag(LA-1, 0));
asserta(dhi <= GetDiag(0, LB-1));
asserta(GetDiag(dlo_mini, dlo_minj) == dlo);
asserta(GetDiag(dlo_maxi, dlo_maxj) == dlo);
asserta(GetDiag(dhi_mini, dhi_minj) == dhi);
asserta(GetDiag(dhi_maxi, dhi_maxj) == dhi);
asserta(dlo_mini >= dhi_mini);
asserta(dlo_minj <= dhi_minj);
asserta(dlo_maxi >= dhi_maxi);
asserta(dlo_maxj <= dhi_maxj);
}
uint GetMini() const
{
return dhi_mini;
}
uint GetMaxi() const
{
return dlo_maxi;
}
uint GetMinj() const
{
return dlo_minj;
}
uint GetMaxj() const
{
return dhi_maxj;
}
/***
i = 0..LA-1
j = 0..LB-1
d = LA - i + j = 1 .. LA+LB-1
j = d - LA + i
i = LA - d + j
***/
void GetRange_j(uint i, uint &Startj, uint &Endj) const
{
// j = d - LA + i
if (dlo + i >= LA)
Startj = dlo + i - LA;
else
Startj = 0;
if (Startj >= LB)
Startj = LB - 1;
if (dhi + i + 1 >= LA)
Endj = dhi + i + 1 - LA;
else
Endj = 0;
if (Endj >= LB)
Endj = LB - 1;
asserta(Endj >= Startj);
asserta(Startj < LB);
}
void LogMe() const
{
Log("LA=%u LB=%d dlo(%u): (%u,%u)-(%u,%u) dhi(%u): (%u,%u)-(%u,%u) i=[%u-%u] j=[%u-%u]\n",
LA, LB,
dlo,
dlo_mini, dlo_minj,
dlo_maxi, dlo_maxj,
dhi,
dhi_mini, dhi_minj,
dhi_maxi, dhi_maxj,
GetMini(), GetMaxi(),
GetMinj(), GetMaxj());
}
};
muscle-5.1.0/src/disperse.cpp 0000664 0000000 0000000 00000000630 14244530626 0016123 0 ustar 00root root 0000000 0000000 #include "muscle.h"
#include "ensemble.h"
void cmd_disperse()
{
const string FileName = opt(disperse);
Ensemble E;
E.FromFile(FileName);
double MaxGapFract = 0.5;
if (optset_max_gap_fract)
MaxGapFract = opt(max_gap_fract);
double D_LP;
double D_Cols;
E.GetDispersion(MaxGapFract, D_LP, D_Cols);
ProgressLog("@disperse file=%s D_LP=%.4g D_Cols=%.4g\n",
FileName.c_str(), D_LP, D_Cols);
}
muscle-5.1.0/src/dividetree.cpp 0000664 0000000 0000000 00000004146 14244530626 0016437 0 ustar 00root root 0000000 0000000 #include "muscle.h"
#include "tree.h"
void MakeSubsetNodes(const Tree &InputTree,
const vector &SubsetNodes,
const vector &SubsetLabels,
Tree &SubsetTree);
void DivideTree(const Tree &InputTree, uint Node,
Tree &Subtree, Tree &Supertree)
{
asserta(InputTree.IsRooted());
const uint InputNodeCount = InputTree.GetNodeCount();
const uint InputLeafCount = InputTree.GetLeafCount();
asserta(Node < InputNodeCount);
asserta(!InputTree.IsRoot(Node));
vector SubtreeLeafNodes;
InputTree.GetSubtreeLeafNodes(Node, SubtreeLeafNodes);
uint N = SIZE(SubtreeLeafNodes);
asserta(N > 0);
set SubtreeSet;
vector SubtreeLabels;
for (uint i = 0; i < N; ++i)
{
uint Node2 = SubtreeLeafNodes[i];
string Label;
InputTree.GetLabel(Node2, Label);
SubtreeSet.insert(Node2);
SubtreeLabels.push_back(Label);
}
vector SupertreeLeafNodes;
vector SupertreeLabels;
for (uint Node2 = 0; Node2 < InputNodeCount; ++Node2)
{
if (!InputTree.IsLeaf(Node2))
continue;
if (SubtreeSet.find(Node2) == SubtreeSet.end())
{
string Label;
InputTree.GetLabel(Node2, Label);
SupertreeLeafNodes.push_back(Node2);
SupertreeLabels.push_back(Label);
}
}
const uint SubtreeLeafCount = SIZE(SubtreeLeafNodes);
const uint SupertreeLeafCount = SIZE(SupertreeLeafNodes);
asserta(SubtreeLeafCount > 0);
asserta(SupertreeLeafCount > 0);
asserta(SubtreeLeafCount + SupertreeLeafCount == InputLeafCount);
MakeSubsetNodes(InputTree, SubtreeLeafNodes, SubtreeLabels,
Subtree);
MakeSubsetNodes(InputTree, SupertreeLeafNodes, SupertreeLabels,
Supertree);
}
void cmd_divide_tree()
{
const string &InputFileName = opt(divide_tree);
Tree InputTree;
InputTree.FromFile(InputFileName);
const string &Label1 = opt(label1);
const string &Label2 = opt(label2);
uint Node1 = InputTree.GetNodeIndex(Label1);
uint Node2 = InputTree.GetNodeIndex(Label2);
uint DivideNode = InputTree.GetLCA(Node1, Node2);
Tree Subtree;
Tree Supertree;
DivideTree(InputTree, DivideNode, Subtree, Supertree);
Subtree.ToFile(opt(subtreeout));
Supertree.ToFile(opt(supertreeout));
}
muscle-5.1.0/src/eacluster.cpp 0000664 0000000 0000000 00000014656 14244530626 0016311 0 ustar 00root root 0000000 0000000 #include "muscle.h"
#include "eacluster.h"
#include "locallock.h"
void MakeReplicateFileName_N(const string &Pattern, uint N, string &FileName)
{
FileName.clear();
bool Found = false;
for (uint i = 0; i < SIZE(Pattern); ++i)
{
char c = Pattern[i];
if (c == '@')
{
string s;
Ps(s, "%u", N);
FileName += s;
Found = true;
}
else
FileName += c;
}
if (!Found)
{
string s;
Ps(s, "%u", N);
FileName += s;
}
}
void EACluster::Clear()
{
m_CentroidSeqIndexes.clear();
m_CentroidIndexToSeqIndexes.clear();
m_SeqIndexToCentroidIndex.clear();
m_ClusterMFAs.clear();
}
void EACluster::Run(MultiSequence &InputSeqs, float MinEA)
{
AssertSameLabels(InputSeqs);
Clear();
m_US.Init();
m_InputSeqs = &InputSeqs;
const uint InputSeqCount = InputSeqs.GetSeqCount();
asserta(InputSeqCount > 0);
m_SeqIndexToCentroidIndex.clear();
m_SeqIndexToCentroidIndex.resize(InputSeqCount, UINT_MAX);
const float MinEE = (1 - MinEA);
uint ClusterCount = 0;
uint MemberCount = 0;
for (uint SeqIndex = 0; SeqIndex < InputSeqCount; ++SeqIndex)
{
ProgressStep(SeqIndex, InputSeqCount,
"UCLUST %u seqs EE<%.2f, %u centroids, %u members",
InputSeqCount, MinEE, ClusterCount, MemberCount);
const char *Label = m_InputSeqs->GetSequence(SeqIndex)->m_Label.c_str();
float BestEA;
uint CentroidIndex = GetBestCentroid(SeqIndex, MinEA, BestEA);
m_SeqIndexToCentroidIndex[SeqIndex] = CentroidIndex;
if (CentroidIndex == UINT_MAX)
{
uint ClusterIndex = ClusterCount;
++ClusterCount;
m_SeqIndexToCentroidIndex[SeqIndex] = ClusterIndex;
m_CentroidSeqIndexes.push_back(SeqIndex);
vector v;
v.push_back(SeqIndex);
m_CentroidIndexToSeqIndexes.push_back(v);
uint L;
const byte *ByteSeq = m_InputSeqs->GetByteSeq(SeqIndex, L);
m_US.AddSeq(ByteSeq, L, SeqIndex);
}
else
{
++MemberCount;
asserta(CentroidIndex < SIZE(m_CentroidIndexToSeqIndexes));
asserta(CentroidIndex < SIZE(m_CentroidSeqIndexes));
uint CentroidSeqIndex = m_CentroidSeqIndexes[CentroidIndex];
const char *CentroidLabel = m_InputSeqs->GetSequence(CentroidSeqIndex)->m_Label.c_str();
m_CentroidIndexToSeqIndexes[CentroidIndex].push_back(SeqIndex);
}
Validate();
}
MakeClusterMFAs();
}
uint EACluster::GetBestCentroid(uint SeqIndex, float MinEA, float &BestEA)
{
uint CentroidCount = SIZE(m_CentroidSeqIndexes);
if (CentroidCount == 0)
return UINT_MAX;
uint L;
const byte *ByteSeq = m_InputSeqs->GetByteSeq(SeqIndex, L);
vector TopSeqIndexes;
vector TopWordCounts;
m_US.SearchSeq(ByteSeq, L, TopSeqIndexes, TopWordCounts);
const uint TopCount = SIZE(TopSeqIndexes);
asserta(SIZE(TopWordCounts) == TopCount);
if (TopCount == 0)
return UINT_MAX;
uint ThreadCount = GetRequestedThreadCount();
BestEA = 0;
uint BestCentroidIndex = UINT_MAX;
bool Done = false;
#pragma omp parallel for num_threads(ThreadCount)
for (int TopIndex = 0; TopIndex < (int) TopCount; ++TopIndex)
{
if (Done)
continue;
uint TopSeqIndex = TopSeqIndexes[TopIndex];
float EA = AlignSeqPair(SeqIndex, TopSeqIndex);
Lock();
if (EA > MinEA && EA > BestEA)
{
BestEA = EA;
asserta(TopSeqIndex < SIZE(m_SeqIndexToCentroidIndex));
uint CentroidIndex = m_SeqIndexToCentroidIndex[TopSeqIndex];
asserta(CentroidIndex < CentroidCount);
BestCentroidIndex = CentroidIndex;
}
if (BestEA >= MinEA)
{
if (BestEA > 0.9)
Done = true;
if (BestEA - EA > 0.3)
Done = true;
}
if (BestEA < MinEA - 0.3 && TopIndex > 20)
Done = true;
Unlock();
}
return BestCentroidIndex;
}
void EACluster::GetClusterMFAs(vector &MFAs) const
{
const uint N = SIZE(m_ClusterMFAs);
MFAs.clear();
for (uint i = 0; i < N; ++i)
{
MultiSequence *ClusterMFA = m_ClusterMFAs[i];
AssertSameLabels(*ClusterMFA);
MFAs.push_back(ClusterMFA);
}
}
void EACluster::WriteMFAs(const string &FileNamePattern) const
{
const uint CentroidCount = SIZE(m_ClusterMFAs);
for (uint CentroidIndex = 0; CentroidIndex < CentroidCount; ++CentroidIndex)
{
ProgressStep(CentroidIndex, CentroidCount, "Write cluster MFAs");
const MultiSequence *MFA = m_ClusterMFAs[CentroidIndex];
asserta(MFA != 0);
string FileName;
MakeReplicateFileName_N(FileNamePattern, CentroidIndex+1, FileName);
MFA->WriteMFA(FileName);
}
}
void EACluster::MakeClusterMFAs()
{
const uint CentroidCount = SIZE(m_CentroidSeqIndexes);
m_ClusterMFAs.clear();
for (uint CentroidIndex = 0; CentroidIndex < CentroidCount; ++CentroidIndex)
{
ProgressStep(CentroidIndex, CentroidCount, "Make cluster MFAs");
MultiSequence *ClusterMFA = new MultiSequence;
asserta(ClusterMFA != 0);
const vector &SeqIndexes =
m_CentroidIndexToSeqIndexes[CentroidIndex];
const uint MemberCount = SIZE(SeqIndexes);
for (uint i = 0; i < MemberCount; ++i)
{
uint SeqIndex = SeqIndexes[i];
const Sequence *seq = m_InputSeqs->GetSequence(SeqIndex);
ClusterMFA->AddSequence(seq, false);
}
AssertSameLabels(*ClusterMFA);
m_ClusterMFAs.push_back(ClusterMFA);
}
AssertSameSeqsVec(*m_InputSeqs, m_ClusterMFAs);
}
float EACluster::AlignSeqPair(uint SeqIndex1, uint SeqIndex2)
{
const Sequence *Seq1 = m_InputSeqs->GetSequence(SeqIndex1);
const Sequence *Seq2 = m_InputSeqs->GetSequence(SeqIndex2);
string Path;
float EA = AlignPairFlat(Seq1, Seq2, Path);
return EA;
}
void EACluster::Validate() const
{
const uint SeqCount = m_InputSeqs->GetSeqCount();
const uint CentroidCount = SIZE(m_CentroidSeqIndexes);
asserta(SIZE(m_CentroidIndexToSeqIndexes) == CentroidCount);
for (uint CentroidIndex = 0; CentroidIndex < CentroidCount; ++CentroidIndex)
{
uint CentroidSeqIndex = m_CentroidSeqIndexes[CentroidIndex];
asserta(CentroidSeqIndex < SeqCount);
const vector &MemberSeqIndexes = m_CentroidIndexToSeqIndexes[CentroidIndex];
const uint MemberCount = SIZE(MemberSeqIndexes);
for (uint MemberIndex = 0; MemberIndex < MemberCount; ++MemberIndex)
{
uint MemberSeqIndex = MemberSeqIndexes[MemberIndex];
asserta(MemberSeqIndex < SeqCount);
uint CentroidIndex2 = m_SeqIndexToCentroidIndex[MemberSeqIndex];
asserta(CentroidIndex2 == CentroidIndex);
}
}
}
void cmd_eacluster()
{
const string &InputFileName = opt(eacluster);
MultiSequence InputSeqs;
InputSeqs.FromFASTA(InputFileName);
const float MinEA = (float) optd(minea, 0.9);
string OutputFileNamePattern = optd(output, "cluster%.afa");
InitProbcons();
EACluster EC;
EC.Run(InputSeqs, MinEA);
EC.WriteMFAs(OutputFileNamePattern);
}
muscle-5.1.0/src/eacluster.h 0000664 0000000 0000000 00000001232 14244530626 0015740 0 ustar 00root root 0000000 0000000 #pragma once
#include "usorter.h"
class EACluster
{
public:
MultiSequence *m_InputSeqs = 0;
USorter m_US;
float m_MinEA = FLT_MAX;
vector m_CentroidSeqIndexes;
vector > m_CentroidIndexToSeqIndexes;
vector m_SeqIndexToCentroidIndex;
vector m_ClusterMFAs;
public:
void Clear();
void Run(MultiSequence &InputSeqs, float MinEA);
void MakeClusterMFAs();
uint GetBestCentroid(uint SeqIndex, float MinEA, float &BestEA);
float AlignSeqPair(uint SeqIndex1, uint SeqIndex2);
void WriteMFAs(const string &FileNamePattern) const;
void GetClusterMFAs(vector &MFAs) const;
void Validate() const;
};
muscle-5.1.0/src/eadistmx.cpp 0000664 0000000 0000000 00000004575 14244530626 0016137 0 ustar 00root root 0000000 0000000 #include "myutils.h"
#include "muscle.h"
#include "locallock.h"
void ProgressLogInputSummary(const string &FileName, const MultiSequence &Seqs);
void CalcEADistMx(FILE *f, MultiSequence* sequences,
vector > &DistMx, vector *SparsePostVec)
{
DistMx.clear();
const uint SeqCount = sequences->GetSeqCount();
DistMx.resize(SeqCount);
for (uint i = 0; i < SeqCount; ++i)
{
DistMx[i].resize(SeqCount, 0);
DistMx[i][i] = 1;
}
if (SparsePostVec != 0)
asserta(SIZE(*SparsePostVec) == 0);
vector SeqIndexes1;
vector SeqIndexes2;
GetAllPairs(SeqCount, SeqIndexes1, SeqIndexes2);
uint PairCount = SIZE(SeqIndexes1);
asserta(SIZE(SeqIndexes1) == PairCount);
uint PairCount2 = (SeqCount * (SeqCount - 1)) / 2;
asserta(PairCount == PairCount2);
// all-vs-all pairwise alignments for posterior probability matrices
unsigned ThreadCount = GetRequestedThreadCount();
uint PairCounter = 0;
float SumEA = 0;
#pragma omp parallel for num_threads(ThreadCount)
for (int PairIndex = 0; PairIndex < (int) PairCount; ++PairIndex)
{
uint SeqIndex1 = SeqIndexes1[PairIndex];
uint SeqIndex2 = SeqIndexes2[PairIndex];
const Sequence* seq1 = sequences->GetSequence(SeqIndex1);
const Sequence* seq2 = sequences->GetSequence(SeqIndex2);
const char *Label1 = seq1->m_Label.c_str();
const char *Label2 = seq2->m_Label.c_str();
Lock();
double MeanEA = (PairCounter == 0 ? 0 : SumEA/PairCounter);
ProgressStep(PairCounter++, PairCount,
"%u consensus seqs, mean EE %.2g", SeqCount, 1 - MeanEA);
Unlock();
string Path;
float EA;
if (SparsePostVec == 0)
EA = AlignPairFlat(seq1, seq2, Path);
else
{
MySparseMx *SparsePost = new MySparseMx;
EA = AlignPairFlat_SparsePost(seq1, seq2, Path, SparsePost);
SparsePostVec->push_back(SparsePost);
}
Lock();
DistMx[SeqIndex1][SeqIndex2] = EA;
DistMx[SeqIndex2][SeqIndex1] = EA;
if (f != 0)
fprintf(f, "%s\t%s\t%.4g\n", Label1, Label2, EA);
SumEA += EA;
Unlock();
}
}
void cmd_eadistmx()
{
const string &InputFileName = opt(eadistmx);
asserta(optset_output);
FILE *f = CreateStdioFile(opt(output));
MultiSequence* sequences = new MultiSequence();
assert(sequences);
sequences->LoadMFA(InputFileName, true);
ProgressLogInputSummary(InputFileName, *sequences);
InitProbcons();
vector > DistMx;
CalcEADistMx(f, sequences, DistMx);
CloseStdioFile(f);
}
muscle-5.1.0/src/eadistmxmsas.cpp 0000664 0000000 0000000 00000001661 14244530626 0017014 0 ustar 00root root 0000000 0000000 #include "muscle.h"
#include "pprog.h"
void cmd_eadistmx_msas()
{
const string &FileName = opt(eadistmx_msas);
vector MSAFileNames;
ReadStringsFromFile(FileName, MSAFileNames);
const uint MSACount = SIZE(MSAFileNames);
asserta(optset_output);
FILE *f = CreateStdioFile(opt(output));
PProg PP;
if (optset_paircount)
PP.m_TargetPairCount = opt(paircount);
bool IsNucleo;
PP.LoadMSAs(MSAFileNames, IsNucleo);
SetAlpha(IsNucleo ? ALPHA_Nucleo : ALPHA_Amino);
InitProbcons();
PP.AlignAllInputPairs();
vector > &ScoreMx = PP.m_ScoreMx;
for (uint i = 0; i < MSACount; ++i)
{
asserta(i < SIZE(ScoreMx));
const char *Labeli = PP.GetMSALabel(i).c_str();
for (uint j = i+1; j < MSACount; ++j)
{
asserta(j < SIZE(ScoreMx[i]));
const char *Labelj = PP.GetMSALabel(j).c_str();
float Score = ScoreMx[i][j];
fprintf(f, "%s\t%s\t%.4f\n", Labeli, Labelj, Score);
}
}
CloseStdioFile(f);
}
muscle-5.1.0/src/eesort.cpp 0000664 0000000 0000000 00000003522 14244530626 0015611 0 ustar 00root root 0000000 0000000 #include "muscle.h"
#include "sort.h"
#include "locallock.h"
void cmd_eesort()
{
const string &QueryFileName = opt(eesort);
const string &DBFileName = opt(db);
const string &OutputFileName = opt(output);
FILE *fTsv = CreateStdioFile(opt(tsvout));
FILE *fFa = CreateStdioFile(OutputFileName);
MultiSequence Query;
MultiSequence DB;
Query.FromFASTA(QueryFileName, true);
Progress("Reading %s ...", DBFileName.c_str());
DB.FromFASTA(DBFileName, true);
Progress("done\n");
bool IsNucleo = DB.GuessIsNucleo();
if (IsNucleo)
SetAlpha(ALPHA_Nucleo);
else
SetAlpha(ALPHA_Amino);
InitProbcons();
const uint QuerySeqCount = Query.GetSeqCount();
const uint DBSeqCount = DB.GetSeqCount();
unsigned ThreadCount = GetRequestedThreadCount();
uint PairCounter = 0;
vector EAs(DBSeqCount, DBL_MAX);
#pragma omp parallel for num_threads(ThreadCount)
for (int iDBSeqIndex = 0; iDBSeqIndex < (int) DBSeqCount; ++iDBSeqIndex)
{
Lock();
ProgressStep(PairCounter++, DBSeqCount, "Calculating");
Unlock();
uint DBSeqIndex = uint(iDBSeqIndex);
const Sequence *DBSeq = DB.GetSequence(DBSeqIndex);
for (uint QuerySeqIndex = 0; QuerySeqIndex < QuerySeqCount; ++QuerySeqIndex)
{
const Sequence *QSeq = Query.GetSequence(QuerySeqIndex);
string Path;
double EA = AlignPairFlat(QSeq, DBSeq, Path);
if (QuerySeqIndex == 0)
{
Lock();
EAs[DBSeqIndex] = EA;
Unlock();
}
}
}
vector Order(DBSeqCount);
QuickSortOrderDesc(EAs.data(), DBSeqCount, Order.data());
for (uint k = 0; k < DBSeqCount; ++k)
{
ProgressStep(k, DBSeqCount, "Writing %s", OutputFileName.c_str());
uint DBSeqIndex = Order[k];
const Sequence *DBSeq = DB.GetSequence(DBSeqIndex);
double EA = EAs[DBSeqIndex];
asserta(EA != DBL_MAX);
Pf(fTsv, "%.3g %s\n", EA, DBSeq->GetLabel().c_str());
DBSeq->WriteMFA(fFa);
}
}
muscle-5.1.0/src/efabestcols.cpp 0000664 0000000 0000000 00000002757 14244530626 0016613 0 ustar 00root root 0000000 0000000 #include "muscle.h"
#include "ensemble.h"
#include "sort.h"
void cmd_efa_bestcols()
{
const string EfaFileName = opt(efa_bestcols);
const string OutputFileName = opt(output);
double MinConf = 1.0;
if (optset_minconf)
MinConf = opt(minconf);
double MaxGapFract = 0.5;
if (optset_max_gap_fract)
MaxGapFract = opt(max_gap_fract);
asserta(MaxGapFract >= 0 && MaxGapFract <= 1.0);
uint MaxCols = UINT_MAX;
if (optset_maxcols)
MaxCols = opt(maxcols);
Ensemble E;
E.FromFile(EfaFileName);
vector Confs;
const uint UniqueIxCount = SIZE(E.m_UniqueIxToIxs);
vector UniqueIxs;
for (uint UniqueIx = 0; UniqueIx < UniqueIxCount; ++UniqueIx)
{
uint n = SIZE(Confs);
double Pct = GetPct(n, UniqueIxCount);
ProgressStep(UniqueIx, UniqueIxCount,
"%u cols (%.1f%%) conf >= %.3g, gaps <= %.3g",
n, Pct, MinConf, MaxGapFract);
double Conf = E.GetConf(UniqueIx);
if (Conf < MinConf)
continue;
uint Ix = E.m_UniqueIxs[UniqueIx];
double GapFract = E.GetGapFract(Ix);
if (GapFract > MaxGapFract)
continue;
UniqueIxs.push_back(UniqueIx);
Confs.push_back(Conf);
}
const uint M = SIZE(Confs);
vector Order(M);
QuickSortOrderDesc(Confs.data(), M, Order.data());
vector BestUniqueIxs;
const uint N = min(SIZE(Order), MaxCols);
for (uint i = 0; i < N; ++i)
{
uint UniqueIx = UniqueIxs[Order[i]];
BestUniqueIxs.push_back(UniqueIx);
}
MSA RepAln;
E.MakeResampledMSA(BestUniqueIxs, RepAln);
RepAln.ToFASTAFile(OutputFileName);
}
muscle-5.1.0/src/efabestconf.cpp 0000664 0000000 0000000 00000003274 14244530626 0016573 0 ustar 00root root 0000000 0000000 #include "muscle.h"
#include "ensemble.h"
void cmd_efa_bestconf()
{
const string &FileName = opt(efa_bestconf);
Ensemble E;
E.FromFile(FileName);
const uint SeqCount = E.GetSeqCount();
const uint MSACount = E.GetMSACount();
const uint IxCount = E.GetIxCount();
double AvgCols = double(IxCount)/MSACount;
ProgressLog("%u seqs, %u MSAs, avg cols %.1f\n",
SeqCount, MSACount, AvgCols);
ProgressLog(" MSA Cols N1 N1f TotConf MedConf Name\n");
// 12345 1234567 12345 1234 1234567 1234567
uint BestMSAIndex_Total = 0;
uint BestMSAIndex_Median = 0;
string BestMSAName_Total;
string BestMSAName_Median;
double BestConf_Total = -1;
double BestConf_Median = -1;
for (uint MSAIndex = 0; MSAIndex < MSACount; ++MSAIndex)
{
const MSA &M = *E.m_MSAs[MSAIndex];
const string &Name = E.m_MSANames[MSAIndex];
uint N1 = E.GetN1(MSAIndex);
uint ColCount = M.GetColCount();
double TotalConf = E.GetTotalConf(MSAIndex);
double MedianConf = E.GetMedianConf(MSAIndex);
if (TotalConf > BestConf_Total)
{
BestConf_Total = TotalConf;
BestMSAIndex_Total = MSAIndex;
BestMSAName_Total = Name;
}
if (MedianConf > BestConf_Median)
{
BestConf_Median = MedianConf;
BestMSAIndex_Median = MSAIndex;
BestMSAName_Median = Name;
}
double N1f = double(N1)/ColCount;
ProgressLog("%5u %7u %5u %4.2f %7.3f %7.4f %s\n",
MSAIndex+1, ColCount, N1, N1f, TotalConf, MedianConf, Name.c_str());
}
ProgressLog("Best MSA, total %u (%s)\n",
BestMSAIndex_Total+1, BestMSAName_Total.c_str());
ProgressLog("Best MSA, median %u (%s)\n",
BestMSAIndex_Median+1, BestMSAName_Median.c_str());
E.m_MSAs[BestMSAIndex_Median]->ToFASTAFile(opt(output));
}
muscle-5.1.0/src/efaexplode.cpp 0000664 0000000 0000000 00000001125 14244530626 0016421 0 ustar 00root root 0000000 0000000 #include "muscle.h"
#include "ensemble.h"
void cmd_efa_explode()
{
const string &InputFileName = opt(efa_explode);
string Prefix;
if (optset_prefix)
Prefix = opt(prefix);
string Suffix;
if (optset_suffix)
Suffix = opt(suffix);
Ensemble E;
E.FromFile(InputFileName);
const uint MSACount = E.GetMSACount();
for (uint MSAIndex = 0; MSAIndex < MSACount; ++MSAIndex)
{
const MSA &M = E.GetMSA(MSAIndex);
string FileName = E.GetMSAName(MSAIndex);
if (FileName == "")
Ps(FileName, "%u", MSAIndex);
FileName = Prefix + FileName + Suffix;
M.ToFASTAFile(FileName);
}
}
muscle-5.1.0/src/efastats.cpp 0000664 0000000 0000000 00000005325 14244530626 0016125 0 ustar 00root root 0000000 0000000 #include "muscle.h"
#include "ensemble.h"
#include "qscorer.h"
static void CmpRef(const Ensemble &E, const MSA &RefMSA,
double MaxGapFract, vector &Qs, vector &TCs)
{
Qs.clear();
TCs.clear();
QScorer QS;
QS.m_MaxGapFract = MaxGapFract;
const uint MSACount = E.GetMSACount();
for (uint MSAIndex = 0; MSAIndex < MSACount; ++MSAIndex)
{
const MSA &TestMSA = E.GetMSA(MSAIndex);
const string &TestName = E.GetMSAName(MSAIndex);
QS.Run(TestMSA, RefMSA);
double Q = QS.m_Q;
double TC = QS.m_TC;
Qs.push_back(Q);
TCs.push_back(TC);
}
}
void cmd_efastats()
{
const string &InputFileName = opt(efastats);
double MaxGapFract = optd(max_gap_fract, 0.5);
const string &RefFileName = opt(ref);
Ensemble E;
E.FromFile(InputFileName);
vector Qs;
vector TCs;
if (optset_ref)
{
MSA RefMSA;
RefMSA.FromFASTAFile(opt(ref));
CmpRef(E, RefMSA, MaxGapFract, Qs, TCs);
}
const uint SeqCount = E.GetSeqCount();
const uint MSACount = E.GetMSACount();
const uint IxCount = E.GetIxCount();
double D_LetterPairs;
double D_Columns;
E.GetDispersion(MaxGapFract, D_LetterPairs, D_Columns);
vector CCs;
double AvgCols = double(IxCount)/MSACount;
ProgressLog(" MSA Cols N1 N1f Conf CC");
// 12345 1234567 12345 1234 1234 12345
if (optset_ref)
ProgressLog(" Q TC");
// 123456 123456
ProgressLog(" Name\n");
for (uint MSAIndex = 0; MSAIndex < MSACount; ++MSAIndex)
{
const MSA &M = *E.m_MSAs[MSAIndex];
const string &Name = E.m_MSANames[MSAIndex];
uint N1 = E.GetN1(MSAIndex);
uint ColCount = M.GetColCount();
double TotalConf = E.GetTotalConf(MSAIndex);
double N1f = double(N1)/ColCount;
double CC = TotalConf/ColCount;
CCs.push_back(CC);
ProgressLog("%5u %7u %5u %4.2f %4.2f %5.3f",
MSAIndex+1, ColCount, N1, N1f, TotalConf, CC);
if (optset_ref)
ProgressLog(" %6.4f %6.4f", Qs[MSAIndex], TCs[MSAIndex]);
ProgressLog(" %s\n", Name.c_str());
}
sort(CCs.begin(), CCs.end());
double MedianCC = CCs[MSACount/2];
Progress("%u seqs, %u MSAs, avg cols %.1f, D_LP %.3g, D_Cols %.3g, CC %.3g",
SeqCount, MSACount, AvgCols, D_LetterPairs, D_Columns, MedianCC);
Log("@SUMMARY input=%s D_LP=%.4f D_Cols=%.4f CC=%.4f",
InputFileName.c_str(), D_LetterPairs, D_Columns, MedianCC);
if (optset_ref)
{
sort(Qs.begin(), Qs.end());
sort(TCs.begin(), TCs.end());
asserta(SIZE(Qs) == MSACount);
asserta(SIZE(TCs) == MSACount);
double MedianQ = Qs[MSACount/2];
double MedianTC = TCs[MSACount/2];
double E_LP = 1 - MedianQ;
double E_Cols = 1 - MedianTC;
Progress(" E_LP %.4f, E_Cols %.4f", E_LP, E_Cols);
Log(" E_LP=%.4f E_Cols=%.4f", E_LP, E_Cols);
}
Progress("\n");
Log("\n");
}
muscle-5.1.0/src/ensemble.cpp 0000664 0000000 0000000 00000056123 14244530626 0016107 0 ustar 00root root 0000000 0000000 #include "muscle.h"
#include "ensemble.h"
#include "qscorer.h"
static char ReadFirstChar(const string &FileName)
{
FILE *f = OpenStdioFile(FileName);
char c;
ReadStdioFile(f, &c, 1);
CloseStdioFile(f);
return c;
}
void Ensemble::SetDerived()
{
ToUpper();
MapLabels();
SortMSAs();
SetUngappedSeqs();
SetColToPosVec();
SetColumns();
}
void Ensemble::MapLabels()
{
asserta(!m_MSAs.empty());
const MSA &M0 = *m_MSAs[0];
const uint SeqCount = M0.GetSeqCount();
M0.GetLabelToSeqIndex(m_Labels0, m_LabelToSeqIndex0);
asserta(SIZE(m_Labels0) == SeqCount);
}
void Ensemble::SortMSA(MSA &M)
{
const MSA &M0 = *m_MSAs[0];
asserta(&M != &M0);
const uint SeqCount = GetSeqCount();
map LabelToSeqIndex2;
vector Labels2;
M.GetLabelToSeqIndex(Labels2, LabelToSeqIndex2);
char **szSeqsSorted = myalloc(char *, SeqCount);
memset_zero(szSeqsSorted, SeqCount);
for (uint SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
{
const string &Label = Labels2[SeqIndex];
map::const_iterator p = m_LabelToSeqIndex0.find(Label);
if (p == m_LabelToSeqIndex0.end())
Die("SortMSA, different labels (%s)", Label.c_str());
uint SeqIndex0 = p->second;
asserta(szSeqsSorted[SeqIndex0] == 0);
szSeqsSorted[SeqIndex0] = M.m_szSeqs[SeqIndex];
}
M.m_szNames = M0.m_szNames;
M.m_szSeqs = szSeqsSorted;
M.GetLabelToSeqIndex(Labels2, LabelToSeqIndex2);
asserta(Labels2 == m_Labels0);
asserta(LabelToSeqIndex2 == m_LabelToSeqIndex0);
}
void Ensemble::SortMSAs()
{
const uint MSACount = GetMSACount();
const uint SeqCount = GetSeqCount();
const MSA &M0 = *m_MSAs[0];
for (uint MSAIndex = 1; MSAIndex < MSACount; ++MSAIndex)
{
MSA &M = *m_MSAs[MSAIndex];
const uint SeqCount2 = M.GetSeqCount();
if (SeqCount2 != SeqCount)
Die("Bad ensemble, different nr seqs");
SortMSA(M);
}
}
void Ensemble::FromEFA(const string &FN)
{
Clear();
vector Strings;
ReadStringsFromFile(FN, Strings);
if (Strings.empty())
Die("Empty EFA (%s)", FN.c_str());
if (Strings[0].c_str()[0] != '<')
Die("Invalid EFA, must start with '<' (%s)", FN.c_str());
vector MSAStrings;
for (uint i = 0; i < SIZE(Strings); ++i)
{
const string &s = Strings[i];
if (s.c_str()[0] == '<')
{
if (!MSAStrings.empty())
{
MSA &M = *new MSA;
M.FromStrings(MSAStrings);
m_MSAs.push_back(&M);
MSAStrings.clear();
}
string MSAName = s.substr(1);
m_MSANames.push_back(MSAName);
}
else
MSAStrings.push_back(s);
}
MSA &M = *new MSA;
M.FromStrings(MSAStrings);
m_MSAs.push_back(&M);
if (SIZE(m_MSAs) != SIZE(m_MSANames))
Die("Invalid EFA, %u MSAs %u names (%s)",
SIZE(m_MSAs), SIZE(m_MSANames), FN.c_str());
SetDerived();
}
void Ensemble::ToEFA(const string &FN) const
{
if (FN.empty())
return;
FILE *f = CreateStdioFile(FN);
const uint MSACount = GetMSACount();
asserta(SIZE(m_MSANames) == MSACount);
for (uint MSAIndex = 0; MSAIndex < MSACount; ++MSAIndex)
{
const string &MSAName = m_MSANames[MSAIndex];
fprintf(f, "<%s\n", MSAName.c_str());
const MSA &M = *m_MSAs[MSAIndex];
M.ToFASTAFile(f);
}
CloseStdioFile(f);
}
void Ensemble::FromFile(const string &FN)
{
char c = ReadFirstChar(FN);
if (c == '<')
FromEFA(FN);
else
FromMSAPaths(FN);
}
void Ensemble::FromMSAPaths(const string &FN)
{
Clear();
m_MSANames.clear();
ReadStringsFromFile(FN, m_MSANames);
const uint MSACount = SIZE(m_MSANames);
if (MSACount == 0)
{
Warning("Empty ensemble");
return;
}
for (uint MSAIndex = 0; MSAIndex < MSACount; ++MSAIndex)
{
ProgressStep(MSAIndex, MSACount, "Reading m_MSAs");
const string &MSAFileName = m_MSANames[MSAIndex];
MSA *M = new MSA;
M->FromFASTAFile(MSAFileName);
m_MSAs.push_back(M);
}
if (opt(basename))
{
for (uint MSAIndex = 0; MSAIndex < MSACount; ++MSAIndex)
{
const string &MSAFileName = m_MSANames[MSAIndex];
m_MSANames[MSAIndex] = string(BaseName(MSAFileName.c_str()));
}
}
if (opt(intsuffix))
{
for (uint MSAIndex = 0; MSAIndex < MSACount; ++MSAIndex)
{
string Name = m_MSANames[MSAIndex];
Psa(Name, ".%u", MSAIndex);
m_MSANames[MSAIndex] = Name;
}
}
SetDerived();
}
uint Ensemble::GetSeqCount() const
{
if (m_MSAs.empty())
return 0;
uint SeqCount = m_MSAs[0]->GetSeqCount();
return SeqCount;
}
void Ensemble::ToUpper()
{
const uint MSACount = GetMSACount();
const uint SeqCount = GetSeqCount();
for (uint MSAIndex = 0; MSAIndex < MSACount; ++MSAIndex)
{
MSA &M = *m_MSAs[MSAIndex];
const uint ColCount = M.GetColCount();
for (uint SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
{
char *Seq = M.m_szSeqs[SeqIndex];
for (uint i = 0; i < ColCount; ++i)
Seq[i] = toupper(Seq[i]);
}
}
}
void Ensemble::MakeResampledMSA(const vector &UniqueIxs, MSA &M) const
{
M.Clear();
const uint ColCount = SIZE(UniqueIxs);
const uint SeqCount = GetSeqCount();
M.SetSize(SeqCount, ColCount);
for (uint SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
{
const string &Label = m_Labels0[SeqIndex];
M.m_szNames[SeqIndex] = mystrsave(Label.c_str());
}
for (uint ColIndex = 0; ColIndex < ColCount; ++ColIndex)
{
uint UniqueIx = UniqueIxs[ColIndex];
asserta(UniqueIx < SIZE(m_UniqueIxs));
uint Ix = m_UniqueIxs[UniqueIx];
asserta(Ix < SIZE(m_ColumnStrings));
const string &ColumnString = m_ColumnStrings[Ix];
asserta(SIZE(ColumnString) == SeqCount);
for (uint SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
{
char c = ColumnString[SeqIndex];
M.m_szSeqs[SeqIndex][ColIndex] = c;
}
}
}
void Ensemble::GetHiQualUniqueIxs(double MaxGapFract, double MinConf,
vector &UniqueIxs) const
{
UniqueIxs.clear();
const uint N = SIZE(m_UniqueIxs);
for (uint UniqueIx = 0; UniqueIx < N; ++UniqueIx)
{
uint Ix = m_UniqueIxs[UniqueIx];
double Conf = GetConf(UniqueIx);
if (Conf < MinConf)
continue;
double GapFract = GetGapFract(Ix);
if (GapFract <= MaxGapFract)
UniqueIxs.push_back(UniqueIx);
}
}
uint Ensemble::GetMedianHiQualColCount(double MaxGapFract, double MinConf) const
{
vector ColCounts;
const uint MSACount = SIZE(m_MSAs);
if (MSACount == 0)
return 0;
for (uint MSAIndex = 0; MSAIndex < MSACount; ++MSAIndex)
{
const MSA &M = *m_MSAs[MSAIndex];
const uint SeqCount = M.GetSeqCount();
const uint ColCount = M.GetColCount();
uint NonGappyColCount = 0;
for (uint ColIndex = 0; ColIndex < ColCount; ++ColIndex)
{
double Conf = GetConf_MSACol(MSAIndex, ColIndex);
if (Conf < MinConf)
continue;
uint GapCount = M.GetGapCount(ColIndex);
double GapFract = double(GapCount)/double(SeqCount);
if (GapFract <= MaxGapFract)
++NonGappyColCount;
}
ColCounts.push_back(NonGappyColCount);
}
sort(ColCounts.begin(), ColCounts.end());
uint MedianColCount = ColCounts[MSACount/2];
return MedianColCount;
}
void Ensemble::SetUngappedSeqs()
{
m_UngappedSeqs.clear();
const MSA &M0 = *m_MSAs[0];
const uint SeqCount = M0.GetSeqCount();
for (uint SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
{
string UngappedSeq;
M0.GetUngappedSeqStr(SeqIndex, UngappedSeq);
m_UngappedSeqs.push_back(UngappedSeq);
}
// Validate same seqs
const uint MSACount = GetMSACount();
for (uint MSAIndex = 0; MSAIndex < MSACount; ++MSAIndex)
{
const MSA &M = *m_MSAs[MSAIndex];
asserta(M.GetSeqCount() == SeqCount);
for (uint SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
{
asserta(strcmp(M0.m_szNames[SeqIndex], M.m_szNames[SeqIndex]) == 0);
string UngappedSeq;
M.GetUngappedSeqStr(SeqIndex, UngappedSeq);
if (UngappedSeq != m_UngappedSeqs[SeqIndex])
{
const uint L = SIZE(UngappedSeq);
const uint L2 = SIZE(m_UngappedSeqs[SeqIndex]);
Log(">%s\n", M0.m_szNames[SeqIndex]);
Log("%s\n", UngappedSeq.c_str());
Log("%s\n", m_UngappedSeqs[SeqIndex].c_str());
for (uint i = 0; i < max(L, L2); ++i)
{
if (i >= min(L, L2))
{
Log("*");
continue;
}
char c = UngappedSeq[i];
char c2 = m_UngappedSeqs[SeqIndex][i];
if (c == c2)
Log(" ");
else
Log("d");
}
Log("\n");
Die("MSA %u UngappedSeq != m_UngappedSeqs[%u]",
MSAIndex, SeqIndex);
}
}
}
}
void Ensemble::SetColToPosVec()
{
const uint MSACount = GetMSACount();
const uint SeqCount = GetSeqCount();
m_ColToPosVec.clear();
m_ColToPosVec.resize(MSACount);
for (uint MSAIndex = 0; MSAIndex < MSACount; ++MSAIndex)
{
const MSA &M = *m_MSAs[MSAIndex];
m_ColToPosVec[MSAIndex].resize(SeqCount);
for (uint SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
M.GetColToPos(SeqIndex, m_ColToPosVec[MSAIndex][SeqIndex]);
}
}
void Ensemble::GetColumn(uint MSAIndex, uint ColIndex,
string &ColStr, vector &PosVec) const
{
ColStr.clear();
PosVec.clear();
const MSA &M = *m_MSAs[MSAIndex];
const uint SeqCount = GetSeqCount();
ColStr.resize(SeqCount, '?');
PosVec.resize(SeqCount, UINT_MAX);
for (uint SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
{
char c = M.GetChar(SeqIndex, ColIndex);
ColStr[SeqIndex] = c;
uint Pos = m_ColToPosVec[MSAIndex][SeqIndex][ColIndex];
PosVec[SeqIndex] = Pos;
if (Pos != UINT_MAX)
{
const string &UngappedSeq = m_UngappedSeqs[SeqIndex];
asserta(Pos < SIZE(UngappedSeq));
char c2 = UngappedSeq[Pos];
asserta(c2 == c);
}
}
for (uint SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
asserta(ColStr[SeqIndex] != '?');
}
void Ensemble::SetColumns()
{
m_ColumnStrings.clear();
m_ColumnPositions.clear();
m_IxToMSAIndex.clear();
m_IxToColIndex.clear();
const uint MSACount = GetMSACount();
const uint SeqCount = GetSeqCount();
if (MSACount == 0)
return;
for (uint MSAIndex = 0; MSAIndex < MSACount; ++MSAIndex)
{
map LabelToSeqIndex2;
const MSA &M = *m_MSAs[MSAIndex];
uint SeqCount2 = M.GetSeqCount();
asserta(SeqCount2 == SeqCount);
const uint ColCount = M.GetColCount();
for (uint ColIndex = 0; ColIndex < ColCount; ++ColIndex)
{
string ColStr;
vector PosVec;
GetColumn(MSAIndex, ColIndex, ColStr, PosVec);
m_ColumnStrings.push_back(ColStr);
m_ColumnPositions.push_back(PosVec);
m_IxToMSAIndex.push_back(MSAIndex);
m_IxToColIndex.push_back(ColIndex);
}
}
SetUniqueColMap();
}
void Ensemble::SetUniqueColMap()
{
m_UniqueIxs.clear();
m_UniqueIxToIxs.clear();
m_IxToUniqueIx.clear();
m_UniqueColMap.clear();
m_MSAColToIx.clear();
const uint MSACount = GetMSACount();
m_MSAColToIx.resize(MSACount);
for (uint MSAIndex = 0; MSAIndex < MSACount; ++MSAIndex)
{
const MSA &M = *m_MSAs[MSAIndex];
uint ColCount = M.GetColCount();
m_MSAColToIx[MSAIndex].resize(ColCount, UINT_MAX);
}
const vector Empty;
const uint N = SIZE(m_ColumnPositions);
for (uint Ix = 0; Ix < N; ++Ix)
{
uint MSAIndex = m_IxToMSAIndex[Ix];
uint ColIndex = m_IxToColIndex[Ix];
asserta(MSAIndex < MSACount);
asserta(ColIndex < SIZE(m_MSAColToIx[MSAIndex]));
m_MSAColToIx[MSAIndex][ColIndex] = Ix;
const vector &PosVec = m_ColumnPositions[Ix];
map, uint>::const_iterator p =
m_UniqueColMap.find(PosVec);
if (p == m_UniqueColMap.end())
{
uint UniqueIx = SIZE(m_UniqueIxs);
m_UniqueColMap[PosVec] = UniqueIx;
m_UniqueIxs.push_back(Ix);
asserta(SIZE(m_UniqueIxToIxs) == UniqueIx);
m_UniqueIxToIxs.push_back(Empty);
m_UniqueIxToIxs[UniqueIx].push_back(Ix);
m_IxToUniqueIx.push_back(UniqueIx);
}
else
{
uint UniqueIx = p->second;
m_UniqueIxToIxs[UniqueIx].push_back(Ix);
m_IxToUniqueIx.push_back(UniqueIx);
}
}
ValidateUniqueColMap();
}
void Ensemble::ValidateUniqueColMap1(uint MSAIndex, uint ColIndex) const
{
asserta(ColIndex < SIZE(m_MSAColToIx[MSAIndex]));
uint Ix = m_MSAColToIx[MSAIndex][ColIndex];
asserta(Ix < SIZE(m_ColumnPositions));
const vector &PosVec = m_ColumnPositions[Ix];
asserta(Ix < SIZE(m_IxToUniqueIx));
uint UniqueIx = m_IxToUniqueIx[Ix];
asserta(UniqueIx < SIZE(m_UniqueIxToIxs));
const vector &Ixs = m_UniqueIxToIxs[UniqueIx];
bool Found = false;
for (uint i = 0; i < SIZE(Ixs); ++i)
{
if (Ixs[i] == Ix)
{
Found = true;
break;
}
}
asserta(Found);
map, uint>::const_iterator p =
m_UniqueColMap.find(PosVec);
asserta(p != m_UniqueColMap.end());
uint UniqueIx2 = p->second;
asserta(UniqueIx == UniqueIx2);
}
void Ensemble::ValidateUniqueIx(uint UniqueIx) const
{
asserta(UniqueIx < SIZE(m_UniqueIxs));
asserta(UniqueIx < SIZE(m_UniqueIxToIxs));
uint Ix = m_UniqueIxs[UniqueIx];
asserta(Ix < SIZE(m_ColumnPositions));
const vector &PosVec = m_ColumnPositions[Ix];
map, uint>::const_iterator p =
m_UniqueColMap.find(PosVec);
asserta(p != m_UniqueColMap.end());
asserta(p->first == PosVec);
const vector &Ixs = m_UniqueIxToIxs[UniqueIx];
for (uint i = 0; i < SIZE(Ixs); ++i)
{
uint Ix2 = Ixs[i];
asserta(Ix2 < SIZE(m_IxToUniqueIx));
uint UniqueIx2 = m_IxToUniqueIx[Ix2];
asserta(UniqueIx2 == UniqueIx);
const vector &PosVec2 = m_ColumnPositions[Ix2];
asserta(PosVec2 == PosVec);
}
}
void Ensemble::ValidateUniqueColMap() const
{
const uint MSACount = GetMSACount();
for (uint MSAIndex = 0; MSAIndex < MSACount; ++MSAIndex)
{
const MSA &M = *m_MSAs[MSAIndex];
uint ColCount = M.GetColCount();
asserta(MSAIndex < SIZE(m_MSAColToIx));
for (uint ColIndex = 0; ColIndex < ColCount; ++ColIndex)
ValidateUniqueColMap1(MSAIndex, ColIndex);
}
const uint UniqueIxCount = SIZE(m_UniqueIxs);
for (uint UniqueIx = 0; UniqueIx < UniqueIxCount; ++UniqueIx)
ValidateUniqueIx(UniqueIx);
}
double Ensemble::GetGapFract(uint Ix) const
{
asserta(Ix < SIZE(m_ColumnStrings));
const string &ColStr = m_ColumnStrings[Ix];
const uint SeqCount = GetSeqCount();
asserta(SIZE(ColStr) == SeqCount);
uint GapCount = 0;
for (uint i = 0; i < SeqCount; ++i)
{
char c = ColStr[i];
if (isgap(c))
++GapCount;
}
double GapFract = double(GapCount)/SeqCount;
return GapFract;
}
void Ensemble::SubsampleWithReplacement(double MaxGapFract,
uint ColCount, MSA &M) const
{
vector Ixs;
GetIxSubset(MaxGapFract, Ixs);
SubsampleWithReplacement(Ixs, ColCount, M);
}
void Ensemble::SubsampleWithReplacement(const vector &Ixs,
uint ColCount, MSA &M) const
{
asserta(ColCount > 0);
const uint SeqCount = GetSeqCount();
M.SetSize(SeqCount, ColCount);
asserta(SIZE(m_Labels0) == SeqCount);
for (uint SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
{
const string &Label = m_Labels0[SeqIndex];
M.m_szNames[SeqIndex] = mystrsave(Label.c_str());
}
const uint N = SIZE(Ixs);
for (uint i = 0; i < N; ++i)
{
uint r = randu32()%N;
uint Ix = Ixs[r];
asserta(Ix < SIZE(m_ColumnStrings));
const string &ColStr = m_ColumnStrings[Ix];
asserta(SIZE(ColStr) == SeqCount);
for (uint SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
M.m_szSeqs[SeqIndex][i] = ColStr[SeqIndex];
}
}
void Ensemble::GetIxSubset(double MaxGapFract, vector &Ixs) const
{
Ixs.clear();
const uint IxCount = SIZE(m_ColumnStrings);
for (uint Ix = 0; Ix < IxCount; ++Ix)
{
double GapFract = GetGapFract(Ix);
if (GapFract <= MaxGapFract)
Ixs.push_back(Ix);
}
}
void Ensemble::GetAbToCountAll(vector &AbToCountAll)
{
const uint MSACount = GetMSACount();
AbToCountAll.clear();
AbToCountAll.resize(MSACount+1, 0);
for (uint MSAIndex = 0; MSAIndex < MSACount; ++MSAIndex)
{
vector AbToCount;
GetAbToCount(MSAIndex, AbToCount);
asserta(SIZE(AbToCount) == MSACount);
for (uint i = 0; i < MSACount; ++i)
AbToCountAll[i] += AbToCount[i];
}
}
void Ensemble::GetAbToCount(uint MSAIndex, vector &AbToCount)
{
const uint MSACount = GetMSACount();
asserta(MSAIndex < MSACount);
AbToCount.clear();
AbToCount.resize(MSACount+1, 0);
const MSA &M = *m_MSAs[MSAIndex];
const uint ColCount = M.GetColCount();
for (uint Col = 0; Col < ColCount; ++Col)
{
uint Ab = GetAb(MSAIndex, Col);
asserta(Ab > 0);
asserta(Ab <= MSACount);
++AbToCount[Ab];
}
}
uint Ensemble::GetIx(uint MSAIndex, uint ColIndex) const
{
asserta(MSAIndex < SIZE(m_MSAColToIx));
asserta(ColIndex < SIZE(m_MSAColToIx[MSAIndex]));
uint Ix = m_MSAColToIx[MSAIndex][ColIndex];
return Ix;
}
uint Ensemble::GetUniqueIx(uint MSAIndex, uint ColIndex) const
{
uint Ix = GetIx(MSAIndex, ColIndex);
asserta(Ix < SIZE(m_IxToUniqueIx));
uint UniqueIx = m_IxToUniqueIx[Ix];
return UniqueIx;
}
double Ensemble::GetMedianConf(uint MSAIndex) const
{
asserta(MSAIndex < SIZE(m_MSAs));
const MSA &M = *m_MSAs[MSAIndex];
const uint ColCount = M.GetColCount();
vector Confs;
for (uint ColIndex = 0; ColIndex < ColCount; ++ColIndex)
{
uint Ix = m_MSAColToIx[MSAIndex][ColIndex];
asserta(Ix < SIZE(m_IxToUniqueIx));
uint UniqueIx = m_IxToUniqueIx[Ix];
double Conf = GetConf(UniqueIx);
Confs.push_back(Conf);
}
sort(Confs.begin(), Confs.end());
double MedianConf = Confs[ColCount/2];
return MedianConf;
}
double Ensemble::GetTotalConf(uint MSAIndex) const
{
asserta(MSAIndex < SIZE(m_MSAs));
const MSA &M = *m_MSAs[MSAIndex];
const uint ColCount = M.GetColCount();
asserta(MSAIndex < SIZE(m_MSAColToIx));
asserta(SIZE(m_MSAColToIx[MSAIndex]) == ColCount);
double SumConf = 0;
for (uint ColIndex = 0; ColIndex < ColCount; ++ColIndex)
{
uint Ix = m_MSAColToIx[MSAIndex][ColIndex];
asserta(Ix < SIZE(m_IxToUniqueIx));
uint UniqueIx = m_IxToUniqueIx[Ix];
double Conf = GetConf(UniqueIx);
SumConf += Conf;
}
return SumConf;
}
double Ensemble::GetConf_MSACol(uint MSAIndex, uint ColIndex) const
{
asserta(MSAIndex < SIZE(m_MSAColToIx));
asserta(ColIndex < SIZE(m_MSAColToIx[MSAIndex]));
uint Ix = m_MSAColToIx[MSAIndex][ColIndex];
uint UniqueIx = m_IxToUniqueIx[Ix];
double Conf = GetConf(UniqueIx);
return Conf;
}
double Ensemble::GetConf(uint UniqueIx) const
{
const uint MSACount = GetMSACount();
asserta(UniqueIx < SIZE(m_UniqueIxToIxs));
const vector &Ixs = m_UniqueIxToIxs[UniqueIx];
uint Ab = SIZE(Ixs);
double Conf = double(Ab)/MSACount;
return Conf;
}
uint Ensemble::GetAb(uint MSAIndex, uint ColIndex) const
{
uint UniqueIx = GetUniqueIx(MSAIndex, ColIndex);
asserta(UniqueIx < SIZE(m_UniqueIxToIxs));
const vector &Ixs = m_UniqueIxToIxs[UniqueIx];
uint Ab = SIZE(Ixs);
asserta(Ab > 0);
return Ab;
}
uint Ensemble::GetN1(uint MSAIndex) const
{
asserta(MSAIndex < SIZE(m_MSAs));
const MSA &M = *m_MSAs[MSAIndex];
const uint ColCount = M.GetColCount();
uint N1 = 0;
for (uint ColIndex = 0; ColIndex < ColCount; ++ColIndex)
{
uint Ab = GetAb(MSAIndex, ColIndex);
if (Ab == 1)
++N1;
}
return N1;
}
void Ensemble::GetDispersion(double MaxGapFract,
double &D_LetterPairs, double &D_Columns) const
{
QScorer QS;
QS.m_MaxGapFract = MaxGapFract;
vector Qs;
vector TCs;
const uint MSACount = GetMSACount();
const uint PairCount = (MSACount*(MSACount - 1))/2;
uint PairIndex = 0;
for (uint i = 0; i < MSACount; ++i)
{
const MSA &MSAi = *m_MSAs[i];
const string &Namei = m_MSANames[i];
for (uint j = i + 1; j < MSACount; ++j)
{
ProgressStep(PairIndex++, PairCount, "Pairwise dists");
const MSA &MSAj = *m_MSAs[j];
const string &Namej = m_MSANames[j];
QS.Run(MSAi, MSAj);
double Qij = QS.m_Q;
double TCij = QS.m_TC;
QS.Run(MSAj, MSAi);
double Qji = QS.m_Q;
double TCji = QS.m_TC;
double Q = (Qij + Qji)/2;
double TC = (TCij + TCji)/2;
asserta(Q >= 0 && Q <= 1);
asserta(TC >= 0 && TC <= 1);
Qs.push_back(Q);
TCs.push_back(TC);
}
}
sort(Qs.begin(), Qs.end());
sort(TCs.begin(), TCs.end());
const uint N = SIZE(Qs);
asserta(SIZE(TCs) == N);
double MedianQ = Qs[N/2];
double MedianTC = TCs[N/2];
D_LetterPairs = 1.0 - MedianQ;
D_Columns = 1.0 - MedianTC;
asserta(D_LetterPairs >= 0 && D_LetterPairs <= 1);
asserta(D_Columns >= 0 && D_Columns <= 1);
}
void Ensemble::CheckRefMSA(const MSA &Ref) const
{
const uint SeqCount = Ref.GetSeqCount();
const uint RefSeqCount = Ref.GetSeqCount();
const uint RefColCount = Ref.GetColCount();
if (RefSeqCount != SeqCount)
Die("Different nr seqs");
for (uint SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
{
const string Label = string(Ref.GetSeqName(SeqIndex));
if (Label != m_Labels0[SeqIndex])
Die("GetRefUniqueIxs, not sorted");
}
}
void Ensemble::GetRefUniqueIxs(const MSA &Ref,
set &UniqueIxs, double MaxGapFract) const
{
UniqueIxs.clear();
CheckRefMSA(Ref);
const uint SeqCount = GetSeqCount();
const uint RefColCount = Ref.GetColCount();
vector > ColToPosVec(SeqCount);
for (uint RefSeqIndex = 0; RefSeqIndex < SeqCount; ++RefSeqIndex)
Ref.GetColToPos(RefSeqIndex, ColToPosVec[RefSeqIndex]);
for (uint RefColIndex = 0; RefColIndex < RefColCount; ++RefColIndex)
{
bool IsUpper = Ref.ColIsUpper(RefColIndex, MaxGapFract);
if (!IsUpper)
continue;
vector PosVec(SeqCount, UINT_MAX);
for (uint SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
{
asserta(SeqIndex < SIZE(ColToPosVec));
asserta(RefColIndex < SIZE(ColToPosVec[SeqIndex]));
uint Pos = ColToPosVec[SeqIndex][RefColIndex];
PosVec[SeqIndex] = Pos;
}
map, uint >::const_iterator p =
m_UniqueColMap.find(PosVec);
if (p != m_UniqueColMap.end())
{
uint UniqueIx = p->second;
UniqueIxs.insert(UniqueIx);
}
}
}
void Ensemble::GetRefPosSet(const MSA &Ref, double MaxGapFract,
set > &PosSet) const
{
PosSet.clear();
CheckRefMSA(Ref);
const uint SeqCount = GetSeqCount();
const uint RefColCount = Ref.GetColCount();
vector > ColToPosVec(SeqCount);
for (uint SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
Ref.GetColToPos(SeqIndex, ColToPosVec[SeqIndex]);
for (uint RefColIndex = 0; RefColIndex < RefColCount; ++RefColIndex)
{
bool IsUpper = Ref.ColIsUpper(RefColIndex, MaxGapFract);
if (!IsUpper)
continue;
for (uint SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
{
uint Pos = ColToPosVec[SeqIndex][RefColIndex];
pair SeqPos(SeqIndex, Pos);
PosSet.insert(SeqPos);
}
}
}
void Ensemble::GetTestUniqueIxs(uint MSAIndex,
const set > &RefPosSet, vector &UniqueIxs,
vector &Confs) const
{
UniqueIxs.clear();
Confs.clear();
const uint MSACount = GetMSACount();
const uint SeqCount = GetSeqCount();
asserta(MSAIndex < MSACount);
const MSA &M = *m_MSAs[MSAIndex];
const uint ColCount = M.GetColCount();
asserta(MSAIndex < SIZE(m_MSAColToIx));
asserta(SIZE(m_MSAColToIx[MSAIndex]) == ColCount);
for (uint ColIndex = 0; ColIndex < ColCount; ++ColIndex)
{
uint Ix = m_MSAColToIx[MSAIndex][ColIndex];
asserta(Ix < SIZE(m_ColumnPositions));
const vector &PosVec = m_ColumnPositions[Ix];
asserta(SIZE(PosVec) == SeqCount);
uint FoundCount = 0;
for (uint SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
{
uint Pos = PosVec[SeqIndex];
if (Pos == UINT_MAX)
continue;
pair SeqPos(SeqIndex, Pos);
if (RefPosSet.find(SeqPos) != RefPosSet.end())
++FoundCount;
}
if (FoundCount >= SeqCount/2)
{
asserta(Ix < SIZE(m_IxToUniqueIx));
uint UniqueIx = m_IxToUniqueIx[Ix];
double Conf = GetConf(UniqueIx);
UniqueIxs.push_back(UniqueIx);
Confs.push_back(Conf);
}
}
}
const MSA &Ensemble::GetMSA(uint MSAIndex) const
{
asserta(MSAIndex < SIZE(m_MSAs));
return *m_MSAs[MSAIndex];
}
const string &Ensemble::GetMSAName(uint MSAIndex) const
{
asserta(MSAIndex < SIZE(m_MSANames));
return m_MSANames[MSAIndex];
}
muscle-5.1.0/src/ensemble.h 0000664 0000000 0000000 00000006423 14244530626 0015552 0 ustar 00root root 0000000 0000000 #pragma once
#include
class Ensemble
{
public:
vector m_MSAs;
vector m_MSANames;
vector m_Labels0;
map m_LabelToSeqIndex0;
vector m_UngappedSeqs;
vector m_ColumnStrings;
vector > m_ColumnPositions;
vector > > m_ColToPosVec;
vector m_IxToMSAIndex;
vector m_IxToColIndex;
vector > m_MSAColToIx;
vector m_UniqueIxs;
vector > m_UniqueIxToIxs;
vector m_IxToUniqueIx;
map, uint> m_UniqueColMap;
public:
void Clear()
{
m_MSAs.clear();
m_MSANames.clear();
m_ColumnStrings.clear();
m_ColumnPositions.clear();
m_Labels0.clear();
m_LabelToSeqIndex0.clear();
m_ColToPosVec.clear();
m_IxToMSAIndex.clear();
m_IxToColIndex.clear();
m_UniqueIxToIxs.clear();
m_UniqueIxs.clear();
m_IxToUniqueIx.clear();
m_UniqueColMap.clear();
m_MSAColToIx.clear();
}
void FromFile(const string &FileName);
void FromMSAPaths(const string &FileName);
void FromEFA(const string &FileName);
void ToEFA(const string &FileName) const;
void SetDerived();
uint GetMSACount() const { return SIZE(m_MSAs); }
uint GetIxCount() const { return SIZE(m_IxToMSAIndex); }
uint GetSeqCount() const;
void SetColumns();
void GetColumn(uint MSAIndex, uint ColIndex,
string &ColStr, vector &ColPos) const;
void GetIxSubset(double MaxGapFract, vector &Ixs) const;
double GetGapFract(uint Ix) const;
void SubsampleWithReplacement(double MaxGapFract,
uint ColCount, MSA &M) const;
void SubsampleWithReplacement(const vector &Ixs,
uint ColCount, MSA &M) const;
void GetAbToCountAll(vector &AbToCount);
void GetAbToCount(uint MSAIndex, vector &AbToCount);
uint GetUniqueIx(uint MSAIndex, uint ColIndex) const;
uint GetIx(uint MSAIndex, uint ColIndex) const;
uint GetAb(uint MSAIndex, uint ColIndex) const;
double GetConf(uint UniqueIx) const;
double GetConf_MSACol(uint MSAIndex, uint ColIndex) const;
uint GetN1(uint MSAIndex) const;
void ValidateUniqueColMap() const;
void ValidateUniqueColMap1(uint MSAIndex, uint ColIndex) const;
void ValidateUniqueIx(uint UniqueIx) const;
void GetDispersion(double MaxGapFract,
double &D_LetterPairs, double &D_Columns) const;
double GetTotalConf(uint MSAIndex) const;
double GetMedianConf(uint MSAIndex) const;
void SortMSA(MSA &M);
void CheckRefMSA(const MSA &Ref) const;
void GetRefPosSet(const MSA &Ref, double MaxGapFract,
set > &PosSet) const;
void GetTestUniqueIxs(uint MSAIndex,
const set > &RefPosSet, vector &UniqueIxs,
vector &Confs) const;
void GetRefUniqueIxs(const MSA &Ref, set &UniqueIxs,
double MaxGapFract) const;
void MakeResampledMSA(const vector &UniqueIxs, MSA &M) const;
uint GetMedianHiQualColCount(double MaxGapFract, double MinConf) const;
void GetHiQualUniqueIxs(double MaxGapFract, double MinConf,
vector &UniqueIxs) const;
const MSA &GetMSA(uint MSAIndex) const;
const string &GetMSAName(uint MSAIndex) const;
void GetLetterConfsVec(const MSA &Ref, double MaxGapFract,
vector > &LetterConfsVec) const;
private:
void MapLabels();
void SortMSAs();
void ToUpper();
void SetColToPosVec();
void SetUngappedSeqs();
void SetUniqueColMap();
};
muscle-5.1.0/src/fa2efa.cpp 0000664 0000000 0000000 00000000565 14244530626 0015440 0 ustar 00root root 0000000 0000000 #include "muscle.h"
#include "ensemble.h"
void cmd_fa2efa()
{
const string &InputFileName = opt(fa2efa);
const string &OutputFileName = opt(output);
Ensemble E;
E.FromMSAPaths(InputFileName);
Progress("%u seqs, %u MSAs\n", E.GetSeqCount(), E.GetMSACount());
Progress("Writing %s ...\n", OutputFileName.c_str());
E.ToEFA(OutputFileName);
Progress("done.\n");
}
muscle-5.1.0/src/fasta.cpp 0000664 0000000 0000000 00000006340 14244530626 0015407 0 ustar 00root root 0000000 0000000 #include "muscle.h"
const unsigned FASTA_BLOCK = 60;
void MSA::FromFASTAFile(TextFile &File)
{
Clear();
FILE *f = File.GetStdioFile();
unsigned uSeqCount = 0;
unsigned uColCount = UINT_MAX;
for (;;)
{
char *Label;
unsigned uSeqLength;
char *GetFastaSeq(FILE *f, unsigned *ptrSeqLength, char **ptrLabel, bool DeleteGaps);
char *SeqData = GetFastaSeq(f, &uSeqLength, &Label, false);
if (0 == SeqData)
break;
AppendSeq(SeqData, uSeqLength, Label);
}
}
void MSA::FromFASTAFile_PreserveCase(const string &FileName)
{
extern bool g_FASTA_Upper;
bool SaveUpper = g_FASTA_Upper;
g_FASTA_Upper = false;
FromFASTAFile(FileName);
g_FASTA_Upper = true;
}
void MSA::FromStrings(const vector &Strings)
{
Clear();
if (Strings.empty())
Die("MSA::FromStrings, no data");
vector Labels;
vector Seqs;
string CurrSeq;
for (uint i = 0; i < SIZE(Strings); ++i)
{
const string &s = Strings[i];
char s0 = s.c_str()[0];
if (s0 == '>')
{
if (!Labels.empty())
Seqs.push_back(CurrSeq);
Labels.push_back(s.substr(1));
CurrSeq.clear();
}
else
{
for (uint i = 0; i < SIZE(s); ++i)
{
char c = s[i];
if (!isspace(c))
CurrSeq.push_back(c);
}
}
}
Seqs.push_back(CurrSeq);
FromStrings2(Labels, Seqs);
}
void MSA::FromStrings2(const vector &Labels, vector &Seqs)
{
const uint SeqCount = SIZE(Labels);
if (SIZE(Seqs) != SeqCount)
Die("Invalid FASTA, %u labels %u seqs", SIZE(Labels), SIZE(Seqs));
if (SeqCount == 0)
Die("Empty FASTA");
const uint ColCount = SIZE(Seqs[0]);
SetSize(SeqCount, ColCount);
for (uint SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
{
const char *Label = Labels[SeqIndex].c_str();
const string &Str = Seqs[SeqIndex];
const uint n = SIZE(Str);
if (n != ColCount)
Die("MSA not aligned, seq lengths %u, %u", ColCount, n);
const char *S = Str.c_str();
m_szNames[SeqIndex] = mystrsave(Label);
m_szSeqs[SeqIndex] = mystrsave(S);
}
}
void MSA::FromFASTAFile(const string &FileName)
{
Clear();
TextFile TF(FileName);
FromFASTAFile(TF);
TF.Close();
}
void MSA::ToFASTAFile(const string &FileName) const
{
if (FileName.empty())
return;
TextFile TF(FileName, true);
ToFASTAFile(TF);
TF.Close();
}
void MSA::ToFASTAFile(FILE *f) const
{
if (f == 0)
return;
for (uint SeqIndex = 0; SeqIndex < m_uSeqCount; ++SeqIndex)
{
const byte *S = (const byte *) m_szSeqs[SeqIndex];
const char *Label = m_szNames[SeqIndex];
SeqToFasta(f, S, m_uColCount, Label);
}
}
void MSA::ToFASTAFile(TextFile &File) const
{
const unsigned uColCount = GetColCount();
assert(uColCount > 0);
const unsigned uLinesPerSeq = (GetColCount() - 1)/FASTA_BLOCK + 1;
const unsigned uSeqCount = GetSeqCount();
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
{
File.PutString(">");
File.PutString(GetSeqName(uSeqIndex));
File.PutString("\n");
unsigned n = 0;
for (unsigned uLine = 0; uLine < uLinesPerSeq; ++uLine)
{
unsigned uLetters = uColCount - uLine*FASTA_BLOCK;
if (uLetters > FASTA_BLOCK)
uLetters = FASTA_BLOCK;
for (unsigned i = 0; i < uLetters; ++i)
{
char c = GetChar(uSeqIndex, n);
File.PutChar(c);
++n;
}
File.PutChar('\n');
}
}
}
muscle-5.1.0/src/fasta2.cpp 0000664 0000000 0000000 00000004664 14244530626 0015500 0 ustar 00root root 0000000 0000000 #include "muscle.h"
#include
#include
const int BUFFER_BYTES = 16*1024;
//const int BUFFER_BYTES = 128;
const int CR = '\r';
const int NL = '\n';
bool g_FASTA_Upper = true;
bool g_FASTA_AllowDigits = false;
#define ADD(c) \
{ \
if (Pos >= BufferLength) \
{ \
const int NewBufferLength = BufferLength + BUFFER_BYTES; \
char *NewBuffer = new char[NewBufferLength]; \
memcpy(NewBuffer, Buffer, BufferLength); \
delete[] Buffer; \
Buffer = NewBuffer; \
BufferLength = NewBufferLength; \
} \
Buffer[Pos++] = c; \
}
// Get next sequence from file.
char *GetFastaSeq(FILE *f, unsigned *ptrSeqLength, char **ptrLabel, bool DeleteGaps)
{
unsigned BufferLength = 0;
unsigned Pos = 0;
char *Buffer = 0;
int c = fgetc(f);
if (EOF == c)
return 0;
if ('>' != c)
Die("Invalid file format, expected '>' to start FASTA label");
for (;;)
{
int c = fgetc(f);
if (EOF == c)
Die("End-of-file or input error in FASTA label");
// NL or CR terminates label
if (NL == c || CR == c)
break;
// All other characters added to label
ADD(c)
}
// Nul-terminate label
ADD(0)
*ptrLabel = Buffer;
BufferLength = 0;
Pos = 0;
Buffer = 0;
int PreviousChar = NL;
for (;;)
{
int c = fgetc(f);
if (EOF == c)
{
if (feof(f))
break;
else if (ferror(f))
Die("Error reading FASTA file, ferror=TRUE feof=FALSE errno=%d %s",
errno, strerror(errno));
else
Die("Error reading FASTA file, fgetc=EOF feof=FALSE ferror=FALSE errno=%d %s",
errno, strerror(errno));
}
if ('>' == c)
{
if (NL == PreviousChar || CR == PreviousChar)
{
ungetc(c, f);
break;
}
else
Die("Unexpected '>' in FASTA sequence data");
}
else if (isspace(c))
;
else if (IsGapChar(c))
{
if (!DeleteGaps)
{
ADD(c);
}
}
else if (isalpha(c))
{
if (g_FASTA_Upper)
c = toupper(c);
ADD(c)
}
else if (g_FASTA_AllowDigits && isdigit(c))
{
ADD(c);
}
else if (isprint(c))
{
Warning("Invalid character '%c' in FASTA sequence data, ignored", c);
continue;
}
else
{
Warning("Invalid byte hex %02x in FASTA sequence data, ignored", (unsigned char) c);
continue;
}
PreviousChar = c;
}
if (0 == Pos)
return GetFastaSeq(f, ptrSeqLength, ptrLabel, DeleteGaps);
*ptrSeqLength = Pos;
return Buffer;
}
muscle-5.1.0/src/filebuffer.h 0000664 0000000 0000000 00000005117 14244530626 0016070 0 ustar 00root root 0000000 0000000 /////////////////////////////////////////////////////////////////
// filebuffer.h
//
// Buffered file reading.
/////////////////////////////////////////////////////////////////
#ifndef FILEBUFFER_H
#define FILEBUFFER_H
#include
#include
#include
#include
using namespace std;
const int BufferSize = 1000;
/////////////////////////////////////////////////////////////////
// FileBuffer
//
// Class for buffering file reading.
/////////////////////////////////////////////////////////////////
class FileBuffer {
ifstream file;
char buffer[BufferSize];
int currPos;
int size;
bool isEOF;
bool isValid;
bool canUnget;
public:
// Some common routines
FileBuffer (const char *filename) : file (filename), currPos (0), size (0), isEOF (false), isValid (!file.fail()), canUnget (false){}
~FileBuffer (){ close(); }
bool fail () const { return !isValid; }
bool eof () const { return (!isValid || isEOF); }
void close(){ file.close(); isValid = false; }
/////////////////////////////////////////////////////////////////
// FileBuffer::Get()
//
// Retrieve a character from the file buffer. Returns true if
// and only if a character is read.
/////////////////////////////////////////////////////////////////
bool Get (char &ch){
// check to make sure that there's more stuff in the file
if (!isValid || isEOF) return false;
// if the buffer is empty, it's time to reload it
if (currPos == size){
file.read (buffer, BufferSize);
size = int(file.gcount());
isEOF = (size == 0);
currPos = 0;
if (isEOF) return false;
}
// store the read character
ch = buffer[currPos++];
canUnget = true;
return true;
}
/////////////////////////////////////////////////////////////////
// FileBuffer::UnGet()
//
// Unretrieve the most recently read character from the file
// buffer. Note that this allows only a one-level undo.
/////////////////////////////////////////////////////////////////
void UnGet (){
assert (canUnget);
assert (isValid);
assert (currPos > 0);
currPos--;
assert (currPos < size);
isEOF = false;
canUnget = false;
}
/////////////////////////////////////////////////////////////////
// FileBuffer::GetLine()
//
// Retrieve characters of text until a newline character is
// encountered. Terminates properly on end-of-file condition.
/////////////////////////////////////////////////////////////////
void GetLine (string &s){
char ch;
s = "";
while (Get (ch) && ch != '\n')
s += ch;
}
};
#endif
muscle-5.1.0/src/flatmx.h 0000664 0000000 0000000 00000000666 14244530626 0015256 0 ustar 00root root 0000000 0000000 #pragma once
static void FlatCoords(uint Ix, uint LY, uint &s, uint &i, uint &j)
{
s = Ix%HMMSTATE_COUNT;
uint r = (Ix - s)/HMMSTATE_COUNT; // i*(LY+1) + j
j = r%(LY+1);
i = r/(LY+1);
}
static inline uint FlatIx(uint s, uint i, uint j, uint LY)
{
uint Ix = HMMSTATE_COUNT*(i*(LY+1) + j) + s;
return Ix;
}
static inline uint FlatIx(HMMSTATE s, uint i, uint j, uint LY)
{
uint Ix = FlatIx(uint(s), i, j, LY);
return Ix;
}
muscle-5.1.0/src/fwdflat3.cpp 0000664 0000000 0000000 00000007672 14244530626 0016034 0 ustar 00root root 0000000 0000000 #include "muscle.h"
/***
Fwd[s][i][j] =
probability of aligning
first i letters of X to
first j letters of Y and
ending in state s.
***/
void CalcFwdFlat(const byte *X, uint LX, const byte *Y, uint LY, float *Flat)
{
#include "hmmscores.h"
char x0 = X[0];
char y0 = Y[0];
float Ins_x0 = InsScore[x0];
float Ins_y0 = InsScore[y0];
float Emit_x0_y0 = MatchScore[x0][y0];
const uint LY1 = LY+1;
const uint Base_0_0 = HMMSTATE_COUNT*(0*(LY1) + 0);
const uint Base_1_1 = HMMSTATE_COUNT*(1*(LY1) + 1);
const uint Base_1_0 = HMMSTATE_COUNT*(1*(LY1) + 0);
const uint Base_0_1 = HMMSTATE_COUNT*(0*(LY1) + 1);
const uint BaseInc_i = HMMSTATE_COUNT*LY1;
const uint BaseInc_j = HMMSTATE_COUNT;
Flat[Base_0_0 + HMMSTATE_M] = LOG_ZERO; // M(0,0)
Flat[Base_0_0 + HMMSTATE_IX] = LOG_ZERO; // IX(0,0)
Flat[Base_0_0 + HMMSTATE_JX] = LOG_ZERO; // JX(0,0)
Flat[Base_0_0 + HMMSTATE_IY] = LOG_ZERO; // IY(0,0)
Flat[Base_0_0 + HMMSTATE_JY] = LOG_ZERO; // JY(0,0)
Flat[Base_1_1 + HMMSTATE_M] = tSM + Emit_x0_y0;
Flat[Base_1_0 + HMMSTATE_IX] = tSI + Ins_x0;
Flat[Base_1_0 + HMMSTATE_JX] = tSJ + Ins_x0;
Flat[Base_0_1 + HMMSTATE_IY] = tSI + Ins_y0;
Flat[Base_0_1 + HMMSTATE_JY] = tSJ + Ins_y0;
uint Base = Base_1_0;
for (uint i = 1; i <= LX; ++i)
{
Flat[Base + HMMSTATE_M] = LOG_ZERO;
Flat[Base + HMMSTATE_IY] = LOG_ZERO;
Flat[Base + HMMSTATE_JY] = LOG_ZERO;
Base += BaseInc_i;
}
Base = Base_0_1;
for (uint j = 1; j <= LY; ++j)
{
Flat[Base + HMMSTATE_M] = LOG_ZERO;
Flat[Base + HMMSTATE_IX] = LOG_ZERO;
Flat[Base + HMMSTATE_JX] = LOG_ZERO;
Base += BaseInc_j;
}
Base = Base_1_0;
uint NextBase = Base + BaseInc_i;
for (uint i = 1; i < LX; ++i)
{
char x = X[i];
float Emit_x = InsScore[x];
Flat[NextBase + HMMSTATE_IX] = Flat[Base + HMMSTATE_IX] + tII + Emit_x;
Flat[NextBase + HMMSTATE_JX] = Flat[Base + HMMSTATE_JX] + tJJ + Emit_x;
Base = NextBase;
NextBase += BaseInc_i;
}
Base = Base_0_1;
NextBase = Base + BaseInc_j;
for (uint j = 1; j < LY; ++j)
{
char y = Y[j];
float Emit_y = InsScore[y];
Flat[NextBase + HMMSTATE_IY] = Flat[Base + HMMSTATE_IY] + tII + Emit_y;
Flat[NextBase + HMMSTATE_JY] = Flat[Base + HMMSTATE_JY] + tJJ + Emit_y;
Base = NextBase;
NextBase += BaseInc_j;
}
uint Base_i_j = Base_1_1;
uint Base_i1_j = Base_0_1;
uint Base_i_j1 = Base_1_0;
uint Base_i1_j1 = Base_0_0;
for (uint i = 1; i <= LX; ++i)
{
char x = X[i-1];
float Emit_x = InsScore[x];
for (uint j = 1; j <= LY; ++j)
{
char y = Y[j-1];
float Emit_y = InsScore[y];
float Emit_Pair = MatchScore[x][y];
if (i == 1 && j == 1)
Flat[Base_1_1 + HMMSTATE_M] = tSM + Emit_x0_y0;
else
{
float M_M = Flat[Base_i1_j1 + HMMSTATE_M] + tMM;
float IX_M = Flat[Base_i1_j1 + HMMSTATE_IX] + tIM;
float JX_M = Flat[Base_i1_j1 + HMMSTATE_JX] + tJM;
float IY_M = Flat[Base_i1_j1 + HMMSTATE_IY] + tIM;
float JY_M = Flat[Base_i1_j1 + HMMSTATE_JY] + tJM;
float SumPrev = LOG_ADD(M_M, IX_M, JX_M, IY_M, JY_M);
Flat[Base_i_j + HMMSTATE_M] = SumPrev + Emit_Pair;
}
float PrevM_i1_j = Flat[Base_i1_j + HMMSTATE_M];
float PrevM_i_j1 = Flat[Base_i_j1 + HMMSTATE_M];
float M_IX = PrevM_i1_j + tMI;
float IX_IX = Flat[Base_i1_j + HMMSTATE_IX] + tII;
Flat[Base_i_j + HMMSTATE_IX] = LOG_ADD(IX_IX, M_IX) + Emit_x;
float M_JX = PrevM_i1_j + tMJ;
float JX_JX = Flat[Base_i1_j + HMMSTATE_JX] + tJJ;
Flat[Base_i_j + HMMSTATE_JX] = LOG_ADD(JX_JX, M_JX) + Emit_x;
float M_IY = PrevM_i_j1 + tMI;
float IY_IY = Flat[Base_i_j1 + HMMSTATE_IY] + tII;
Flat[Base_i_j + HMMSTATE_IY] = LOG_ADD(IY_IY, M_IY) + Emit_y;
float M_JY = PrevM_i_j1 + tMJ;
float JY_JY = Flat[Base_i_j1 + HMMSTATE_JY] + tJJ;
Flat[Base_i_j + HMMSTATE_JY] = LOG_ADD(JY_JY, M_JY) + Emit_y;
Base_i_j += BaseInc_j;
Base_i1_j += BaseInc_j;
Base_i_j1 += BaseInc_j;
Base_i1_j1 += BaseInc_j;
}
Base_i_j += BaseInc_j;
Base_i1_j += BaseInc_j;
Base_i_j1 += BaseInc_j;
Base_i1_j1 += BaseInc_j;
}
}
muscle-5.1.0/src/getconsseq.cpp 0000664 0000000 0000000 00000003052 14244530626 0016461 0 ustar 00root root 0000000 0000000 #include "muscle.h"
static char GetConsChar(const MultiSequence &MSA, uint ColIndex)
{
asserta(g_AlphaSize == 4 || g_AlphaSize == 20);
vector Counts(g_AlphaSize+1);
const uint ColCount = MSA.GetColCount();
const uint SeqCount = MSA.GetSeqCount();
for (uint SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
{
char c = MSA.GetChar(SeqIndex, ColIndex);
if (isgap(c))
{
++(Counts[g_AlphaSize]);
continue;
}
uint Letter = CharToLetter(c);
if (Letter < g_AlphaSize)
++(Counts[Letter]);
}
uint MaxCount = 0;
uint MaxLetter = 0;
for (uint Letter = 0; Letter <= g_AlphaSize; ++Letter)
{
uint Count = Counts[Letter];
if (Count > MaxCount)
{
MaxCount = Count;
MaxLetter = Letter;
}
}
if (MaxLetter == g_AlphaSize)
return '-';
char ConsChar = LetterToChar(MaxLetter);
return ConsChar;
}
void GetConsensusSequence(const MultiSequence &MSA, string &Seq)
{
Seq.clear();
const uint SeqCount = MSA.GetSeqCount();
const uint ColCount = MSA.GetColCount();
vector Freqs(g_AlphaSize);
for (uint ColIndex = 0; ColIndex < ColCount; ++ColIndex)
{
char c = GetConsChar(MSA, ColIndex);
if (c != '-')
Seq += c;
}
}
void cmd_consseq()
{
const string &MSAFileName = opt(consseq);
const string &OutputFileName = opt(output);
string Label = "CONSENSUS";
if (optset_label)
Label = opt(label);
MultiSequence MSA;
MSA.FromFASTA(MSAFileName);
string ConsSeq;
GetConsensusSequence(MSA, ConsSeq);
FILE *fOut = CreateStdioFile(OutputFileName);
SeqToFasta(fOut, ConsSeq, Label);
CloseStdioFile(fOut);
}
muscle-5.1.0/src/getpairs.cpp 0000664 0000000 0000000 00000002747 14244530626 0016136 0 ustar 00root root 0000000 0000000 #include "muscle.h"
void GetAllPairs(uint Count,
vector &Indexes1, vector &Indexes2)
{
Indexes1.clear();
Indexes2.clear();
for (uint i = 0; i < Count; ++i)
{
for (uint j = i + 1; j < Count; ++j)
{
Indexes1.push_back(i);
Indexes2.push_back(j);
}
}
}
void GetAllPairs(uint Count1, uint Count2,
vector &Indexes1, vector &Indexes2)
{
Indexes1.clear();
Indexes2.clear();
for (uint i = 0; i < Count1; ++i)
{
for (uint j = 0; j < Count2; ++j)
{
Indexes1.push_back(i);
Indexes2.push_back(j);
}
}
}
void GetPairs(uint Count1, uint Count2, uint TargetPairCount,
vector &Indexes1, vector &Indexes2)
{
Indexes1.clear();
Indexes2.clear();
uint AllPairCount = Count1*Count2;
if (TargetPairCount == UINT_MAX || AllPairCount < TargetPairCount*3/2)
{
GetAllPairs(Count1, Count2, Indexes1, Indexes2);
return;
}
set > PairSet;
const uint MaxCounter = TargetPairCount*10;
uint Counter = 0;
while (Counter++ < MaxCounter && (uint) SIZE(PairSet) < TargetPairCount)
{
uint i = randu32()%Count1;
uint j = randu32()%Count2;
if (i == j)
continue;
pair Pair(i, j);
PairSet.insert(Pair);
}
uint PairCount = SIZE(PairSet);
asserta(PairCount > TargetPairCount/2);
for (set >::const_iterator p = PairSet.begin();
p != PairSet.end(); ++p)
{
uint Index1 = p->first;
uint Index2 = p->second;
Indexes1.push_back(Index1);
Indexes2.push_back(Index2);
}
}
muscle-5.1.0/src/getpostpairsalignedflat.cpp 0000664 0000000 0000000 00000004561 14244530626 0021233 0 ustar 00root root 0000000 0000000 #include "muscle.h"
#include "locallock.h"
float GetPostPairsAlignedFlat(const string &aProgressStr,
const MultiSequence &MSA1, const MultiSequence &MSA2,
const vector &SeqIndexes1, const vector &SeqIndexes2,
vector &SparsePosts)
{
string ProgressStr = aProgressStr;
if (SIZE(ProgressStr) > 20)
ProgressStr = ProgressStr.substr(0, 20);
const uint SeqCount1 = MSA1.GetSeqCount();
const uint SeqCount2 = MSA2.GetSeqCount();
const uint PairCount = SIZE(SeqIndexes1);
asserta(SIZE(SeqIndexes2) == PairCount);
asserta(SparsePosts.empty());
// Allocate here to avoid race condition with push_back() in loop
SparsePosts.resize(PairCount);
int PairCounter = 0;
uint ThreadCount = GetRequestedThreadCount();
float SumEA = 0;
#pragma omp parallel for num_threads(ThreadCount)
for (int PairIndex = 0; PairIndex < (int) PairCount; ++PairIndex)
{
uint Min = min(SeqCount1, SeqCount2);
uint Max = max(SeqCount1, SeqCount2);
Lock();
ProgressStep(PairCounter++, PairCount,
"%s [%u x %u, %u pairs]",
ProgressStr.c_str(), Min, Max, PairCount);
Unlock();
uint SeqIndex1 = SeqIndexes1[PairIndex];
uint SeqIndex2 = SeqIndexes2[PairIndex];
asserta(SeqIndex1 < SeqCount1);
asserta(SeqIndex2 < SeqCount2);
const Sequence *gapped_seq1 = MSA1.GetSequence(SeqIndex1);
const Sequence *gapped_seq2 = MSA2.GetSequence(SeqIndex2);
Sequence *seq1 = gapped_seq1->DeleteGaps();
Sequence *seq2 = gapped_seq2->DeleteGaps();
const byte *ByteSeq1 = seq1->GetBytePtr();
const byte *ByteSeq2 = seq2->GetBytePtr();
const uint L1 = seq1->GetLength();
const uint L2 = seq2->GetLength();
float *Fwd = AllocFB(L1, L2);
float *Bwd = AllocFB(L1, L2);
float *Post = AllocPost(L1, L2);
CalcFwdFlat(ByteSeq1, L1, ByteSeq2, L2, Fwd);
CalcBwdFlat(ByteSeq1, L1, ByteSeq2, L2, Bwd);
DeleteSequence(seq1);
DeleteSequence(seq2);
CalcPostFlat(Fwd, Bwd, L1, L2, Post);
delete Fwd;
delete Bwd;
float *DPRows = AllocDPRows(L1, L2);
char *TB = AllocTB(L1, L2);
string Path;
float Score = CalcAlnFlat(Post, L1, L2, DPRows, TB, Path);
delete DPRows;
delete TB;
MySparseMx *SparsePost = new MySparseMx;
asserta(SparsePost);
SparsePost->FromPost(Post, L1, L2);
SparsePosts[PairIndex] = SparsePost;
delete Post;
float EA = Score/min(L1, L2);
Lock();
SumEA += EA;
Unlock();
}
float AvgEA = SumEA/PairCount;
return AvgEA;
}
muscle-5.1.0/src/gitver.bash 0000664 0000000 0000000 00000000540 14244530626 0015740 0 ustar 00root root 0000000 0000000 #!/bin/bash
if [ ! -d ../.git ] ; then
if [ ! -f gitver.txt ] ; then
echo "0" > gitver.txt
fi
echo "Repo not found, git hash set to zero"
exit 0
fi
PATH=$PATH:/usr/bin
git describe --abbrev=6 --dirty --long --always \
> gitver.tmp
sed -i '-es/"//g' gitver.tmp
echo \"`cat gitver.tmp`\" > gitver.txt
rm -f gitver.tmp
cat gitver.txt
muscle-5.1.0/src/gitver.bat 0000664 0000000 0000000 00000000364 14244530626 0015575 0 ustar 00root root 0000000 0000000 @echo off
if exist gitver.txt (
echo gitver.txt found
) else (
echo "-" > gitver.txt
)
if exist c:\cygwin64\bin\bash.exe (
echo bash found
c:\cygwin64\bin\bash -c ./gitver.bash
) else (
echo bash not found
echo 000 > gitver.txt
exit
)
muscle-5.1.0/src/globalinputms.cpp 0000664 0000000 0000000 00000003564 14244530626 0017176 0 ustar 00root root 0000000 0000000 #include "muscle.h"
static MultiSequence *g_GlobalMS;
static uint g_GlobalMSSeqCount = 0;
static double g_GlobalMSMeanSeqLength = 0;
static uint g_GlobalMSMaxSeqLength = 0;
void ClearGlobalInputMS()
{
if (g_GlobalMS == 0)
return;
delete g_GlobalMS;
g_GlobalMS = 0;
}
MultiSequence &LoadGlobalInputMS(const string &FileName)
{
asserta(g_GlobalMS == 0);
g_GlobalMS = new MultiSequence;
asserta(g_GlobalMS != 0);
g_GlobalMS->FromFASTA(FileName, true);
g_GlobalMSSeqCount = g_GlobalMS->GetSeqCount();
g_GlobalMSMeanSeqLength = 0;
g_GlobalMSMaxSeqLength = 0;
double SumSeqLength = 0;
for (uint GSI = 0; GSI < g_GlobalMSSeqCount; ++GSI)
{
const Sequence *Seq = g_GlobalMS->GetSequence(GSI);
uint L = Seq->GetLength();
g_GlobalMSMaxSeqLength = max(L, g_GlobalMSMaxSeqLength);
SumSeqLength += L;
Sequence *HackSeq = (Sequence *) Seq;
HackSeq->m_GSI = GSI;
}
if (g_GlobalMSSeqCount > 0)
g_GlobalMSMeanSeqLength = SumSeqLength/g_GlobalMSSeqCount;
return *g_GlobalMS;
}
MultiSequence &GetGlobalInputMS()
{
asserta(g_GlobalMS != 0);
return *g_GlobalMS;
}
uint GetGlobalMSSeqCount()
{
return g_GlobalMSSeqCount;
}
double GetGlobalMSMeanSeqLength()
{
return g_GlobalMSMeanSeqLength;
}
uint GetGSICount()
{
return GetGlobalMSSeqCount();
}
const Sequence &GetGlobalInputSeq(uint GSI)
{
asserta(GSI < g_GlobalMSSeqCount);
asserta(g_GlobalMS != 0);
const Sequence *Seq = g_GlobalMS->GetSequence(GSI);
asserta(Seq != 0);
return *Seq;
}
const string &GetGlobalInputSeqLabel(uint GSI)
{
const Sequence &Seq = GetGlobalInputSeq(GSI);
const string &Label = Seq.GetLabel();
return Label;
}
void ShowGlobalInputSeqStats()
{
ProgressLog("Input: %u seqs, length avg %.0f max %u\n\n",
g_GlobalMSSeqCount, g_GlobalMSMeanSeqLength, g_GlobalMSMaxSeqLength);
if (g_GlobalMSMaxSeqLength > 5000)
Warning("Sequence length >5k may require excessive memory");
}
muscle-5.1.0/src/gobuff.h 0000664 0000000 0000000 00000001535 14244530626 0015227 0 ustar 00root root 0000000 0000000 #ifndef gobuff_h
#define gobuff_h
#include "myutils.h"
template class GoBuff
{
public:
unsigned MaxSize;
unsigned Size;
T *Data;
public:
GoBuff()
{
MaxSize = 0;
Size = 0;
Data = 0;
}
~GoBuff() { Free(); }
void Free()
{
myfree(Data);
Size = 0;
Data = 0;
}
void Alloc(unsigned n)
{
if (n <= MaxSize)
return;
unsigned NewMaxSize = n + SizeInc;
T *NewBuffer = myalloc(T, NewMaxSize);
if (Size > 0)
{
if (CopyOnGrow)
memcpy(NewBuffer, Data, Size*sizeof(T));
myfree(Data);
}
if (ZeroOnGrow)
memset(NewBuffer, 0, NewMaxSize*sizeof(T));
Data = NewBuffer;
MaxSize = NewMaxSize;
}
unsigned GetMemUseBytes() const
{
return (MaxSize*sizeof(T));
}
};
const unsigned GROW64K = 0x10000;
#endif // gobuff_h
muscle-5.1.0/src/guidetreejoinorder.cpp 0000664 0000000 0000000 00000014346 14244530626 0020207 0 ustar 00root root 0000000 0000000 #include "myutils.h"
#include "muscle.h"
#include "textfile.h"
#include "tree.h"
#include "pprog.h"
#include