pax_global_header 0000666 0000000 0000000 00000000064 12635670016 0014520 g ustar 00root root 0000000 0000000 52 comment=5779b83807f368192275a7d5ae37facfd5f85737
axe-0.3.1/ 0000775 0000000 0000000 00000000000 12635670016 0012276 5 ustar 00root root 0000000 0000000 axe-0.3.1/.gitattributes 0000664 0000000 0000000 00000000040 12635670016 0015163 0 ustar 00root root 0000000 0000000 docs/gitversion.py export-subst
axe-0.3.1/.gitignore 0000664 0000000 0000000 00000000125 12635670016 0014264 0 ustar 00root root 0000000 0000000 # Editor temp files
*.swp
*.bak
*~
# compiled
build
*.pyc
*.o
*.a
*.so
tags
version
axe-0.3.1/.gitmodules 0000664 0000000 0000000 00000000134 12635670016 0014451 0 ustar 00root root 0000000 0000000 [submodule "src/libqes"]
path = src/libqes
url = https://github.com/kdmurray91/libqes.git
axe-0.3.1/.travis.yml 0000664 0000000 0000000 00000001365 12635670016 0014414 0 ustar 00root root 0000000 0000000 language: c
env:
matrix:
- BUILD_TYPE=Release
- BUILD_TYPE=Debug
compiler:
- clang
- gcc
sudo: false
notifications:
email:
- spam@kdmurray.id.au
addons:
apt:
sources:
- kalakris-cmake
packages:
- cmake
- lcov
- libgsl0-dev
- python
- python-sphinx
install:
- pushd $HOME
- wget http://zlib.net/zlib-1.2.8.tar.xz
- tar xvf zlib-1.2.8.tar.xz
- cd zlib-1.2.8
- ./configure --prefix=$HOME
- make
- make install
- popd
- git submodule update --init
- mkdir build
- mkdir target
- cd build
script:
- cmake .. -DCMAKE_INSTALL_PREFIX=../target -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DZLIB_ROOT=$HOME
- make
- python ../tests/axe_cli_tests.py .
- ./bin/test_axe
- make install
- test -f ../target/bin/axe-demux
axe-0.3.1/CMakeLists.txt 0000664 0000000 0000000 00000005551 12635670016 0015044 0 ustar 00root root 0000000 0000000 CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
PROJECT(axe C)
LIST(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake-modules")
# Cmake options
SET(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
SET(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
ENABLE_TESTING()
IF (NOT CMAKE_BUILD_TYPE)
SET(CMAKE_BUILD_TYPE Release)
ENDIF()
IF (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/version")
FILE(STRINGS "${CMAKE_CURRENT_SOURCE_DIR}/version" AXE_VERSION)
ELSE()
# git describe as versioning
EXECUTE_PROCESS(COMMAND git describe
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
OUTPUT_VARIABLE AXE_VERSION
OUTPUT_STRIP_TRAILING_WHITESPACE)
ENDIF()
MESSAGE(STATUS "${CMAKE_BUILD_TYPE} build of axe version: ${AXE_VERSION}")
###############################
## Find Packages and Headers ##
###############################
FIND_PACKAGE(ZLIB 1.2.5 REQUIRED)
FIND_PACKAGE(GSL)
IF (GSL_FOUND)
SET(AXE_DEP_INCLUDES ${GSL_INCLUDE_DIRS})
SET(AXE_DEP_LIBS ${GSL_LIBRARIES})
ENDIF()
##########################
## Set Compiler Options ##
##########################
IF (CMAKE_COMPILER_IS_GNUCC)
SET(AXEWRN "${AXEWRN} -Woverride-init -Wnormalized=id -Wlogical-op")
EXECUTE_PROCESS(COMMAND ${CMAKE_C_COMPILER} -dumpversion
OUTPUT_VARIABLE GCC_VERSION)
IF (GCC_VERSION VERSION_GREATER 4.9 OR GCC_VERSION VERSION_EQUAL 4.9)
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-color=always")
ENDIF()
ENDIF()
# Set CFLAGS
SET(AXEWRN "${AXEWRN} -fstack-protector-all -Wstack-protector -Wfloat-equal")
SET(AXEWRN "${AXEWRN} -Wundef -Wpointer-arith -Wstrict-prototypes")
SET(AXEWRN "${AXEWRN} -Wmissing-prototypes -Wwrite-strings -Wredundant-decls")
SET(AXEWRN "${AXEWRN} -Wchar-subscripts -Wcomment -Wformat=2 -Wwrite-strings")
SET(AXEWRN "${AXEWRN} -Wmissing-declarations -Wredundant-decls -Wnested-externs")
SET(AXEWRN "${AXEWRN} -Wbad-function-cast -Wswitch-enum -Winit-self")
SET(AXEWRN "${AXEWRN} -Wmissing-field-initializers -Wdeclaration-after-statement")
SET(AXEWRN "${AXEWRN} -Wold-style-definition -Waddress -Wmissing-noreturn ")
SET(AXEWRN "${AXEWRN} -Wstrict-overflow=1 -Wextra -Warray-bounds -Wall -D_FORTIFY_SOURCE=2")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=gnu99")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AXEWRN}")
INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/src
${CMAKE_SOURCE_DIR}/src/datrie
${CMAKE_SOURCE_DIR}/src/libqes/src
${CMAKE_SOURCE_DIR}/src/gsl)
LINK_DIRECTORIES(${CMAKE_BINARY_DIR}/lib)
INCLUDE_DIRECTORIES(${CMAKE_BINARY_DIR})
CONFIGURE_FILE(${CMAKE_SOURCE_DIR}/src/axe_config.h.in
${CMAKE_BINARY_DIR}/axe_config.h)
ADD_SUBDIRECTORY(docs)
ADD_SUBDIRECTORY(tests)
ADD_SUBDIRECTORY(src)
SET(LIBQES_AS_SUBMODULE True) # stop libqes installing itself
ADD_SUBDIRECTORY(src/libqes)
axe-0.3.1/LICENSE.txt 0000664 0000000 0000000 00000077246 12635670016 0014141 0 ustar 00root root 0000000 0000000 GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc.
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The GNU General Public License is a free, copyleft license for
software and other kinds of works.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
the GNU General Public License is intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users. We, the Free Software Foundation, use the
GNU General Public License for most of our software; it applies also to
any other work released this way by its authors. You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
To protect your rights, we need to prevent others from denying you
these rights or asking you to surrender the rights. Therefore, you have
certain responsibilities if you distribute copies of the software, or if
you modify it: responsibilities to respect the freedom of others.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must pass on to the recipients the same
freedoms that you received. You must make sure that they, too, receive
or can get the source code. And you must show them these terms so they
know their rights.
Developers that use the GNU GPL protect your rights with two steps:
(1) assert copyright on the software, and (2) offer you this License
giving you legal permission to copy, distribute and/or modify it.
For the developers' and authors' protection, the GPL clearly explains
that there is no warranty for this free software. For both users' and
authors' sake, the GPL requires that modified versions be marked as
changed, so that their problems will not be attributed erroneously to
authors of previous versions.
Some devices are designed to deny users access to install or run
modified versions of the software inside them, although the manufacturer
can do so. This is fundamentally incompatible with the aim of
protecting users' freedom to change the software. The systematic
pattern of such abuse occurs in the area of products for individuals to
use, which is precisely where it is most unacceptable. Therefore, we
have designed this version of the GPL to prohibit the practice for those
products. If such problems arise substantially in other domains, we
stand ready to extend this provision to those domains in future versions
of the GPL, as needed to protect the freedom of users.
Finally, every program is threatened constantly by software patents.
States should not allow patents to restrict development and use of
software on general-purpose computers, but in those that do, we wish to
avoid the special danger that patents applied to a free program could
make it effectively proprietary. To prevent this, the GPL assures that
patents cannot be used to render the program non-free.
The precise terms and conditions for copying, distribution and
modification follow.
TERMS AND CONDITIONS
0. Definitions.
"This License" refers to version 3 of the GNU General Public License.
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
"The Program" refers to any copyrightable work licensed under this
License. Each licensee is addressed as "you". "Licensees" and
"recipients" may be individuals or organizations.
To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy. The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.
A "covered work" means either the unmodified Program or a work based
on the Program.
To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
To "convey" a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.
An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
1. Source Code.
The "source code" for a work means the preferred form of the work
for making modifications to it. "Object code" means any non-source
form of a work.
A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.
The Corresponding Source for a work in source code form is that
same work.
2. Basic Permissions.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force. You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
Conveying under any other circumstances is permitted solely under
the conditions stated below. Sublicensing is not allowed; section 10
makes it unnecessary.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.
4. Conveying Verbatim Copies.
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to
"keep intact all notices".
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the
Corresponding Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission, provided
you inform other peers where the object code and Corresponding
Source of the work are being offered to the general public at no
charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling. In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage. For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product. A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.
"Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source. The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed. Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
"Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law. If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material, or
requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors or
authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions of
it) with contractual assumptions of liability to the recipient, for
any liability that these contractual assumptions directly impose on
those licensors and authors.
All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or
run a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's "contributor version".
A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License. You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all. For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
13. Use with the GNU Affero General Public License.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU Affero General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the special requirements of the GNU Affero General Public License,
section 13, concerning interaction through a network will apply to the
combination as such.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
the GNU General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies that a certain numbered version of the GNU General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
GNU General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
versions of the GNU General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
axe-0.3.1/README.md 0000664 0000000 0000000 00000012345 12635670016 0013562 0 ustar 00root root 0000000 0000000 AXE
===
> De-multiplex NGS reads using trie data structures. It's fast, and made of tries!
[](https://zenodo.org/record/12278)
[](https://readthedocs.org/projects/axe-demultiplexer/?badge=latest)
AXE very rapidly selects the optimal barcode present in a sequence read, even
in the presence of sequencing errors. The algorithm is able to handle
combinatorial barcoding, barcodes of differing length, and several mismatches
per barcode. Early results indicate far improved accuracy and speed over
existing de-multiplexers. Unscientific trials show AXE processes more than
500,000 reads per second.
**Warning**: Axe has not yet been comprehensively tested. However, in the
spirit of "release early and often", here it is.
###Tests:
| Jenkins GNU/Linux | [](http://biojenkins.anu.edu.au/job/axe/) |
| ----------------- | --- |
| TravisCI | [](https://travis-ci.org/kdmurray91/axe) |
| Test Coverage | [](https://coveralls.io/r/kdmurray91/axe?branch=master) |
| Coverity Scans | [](https://scan.coverity.com/projects/2666) |
Important Note
--------------
**For arcane reasons, the name of the ``axe`` binary changed to ``axe-demux``
with version 0.3.0. Apologies for the inconvenience, this was required to
make ``axe`` installable in Debian and its derivatives. Command-line usage
did not change.**
Installation:
-------------
Currently, only recent GNU/Linux systems are officially supported. All code and
the build system is portable, so compilation and use on other systems should be
possible, I just don't have machines available to test. Please report any
installation issues on any system as GitHub bugs and I'll do my best to sort
them out.
In short, on *NIX, get the dependencies (see below), and:
git clone --recursive https://github.com/kdmurray91/axe.git axe
cd axe
mkdir -p build && cd build
cmake ..
make
sudo make install
To install to a prefix, as you would with `./configure --prefix` with the
autotools build system, please use the following cmake command in place of the
one above:
cmake -DCMAKE_INSTALL_PREFIX=/path/to/your/prefix ..
e.g.:
cmake -DCMAKE_INSTALL_PREFIX=$HOME ..
For me, using `~/` as the prefix will install `axe` under `/home/kevin/bin` on
GNU/Linux, and (if I had one) `/Users/kevin/bin` on Mac OSX.It's also wise to
use `make install` not `sudo make install` when installing to a home directory.
###Dependencies:
- cmake. This is installable via `sudo apt-get install cmake` on Debian based
systems, or `brew install cmake` using homebrew on OS X.
- zlib version >= 1.2.5. On Debian, use the package `zlib1g-dev`.
- libqes, tinytest, libgsl and libdatrie (bundled in source, if you used
`git clone --recursive` or an installation tarball. Otherwise, run
`git submodule update --init`).
You'll possibly need to install zlib to your chosen prefix (e.g. `~/`) on
supercomputers, which often have very old versions of zlib. To do so:
wget http://zlib.net/zlib-1.2.8.tar.gz
tar xvf zlib-1.2.8.tar.gz
cd zlib-1.2.8
./configure --prefix= # e.g. --prefix=$HOME
make && make install
And then, use the following cmake command, assuming your prefix is `~/`:
cmake -DCMAKE_INSTALL_PREFIX=$HOME -DZLIB_ROOT=$HOME ..
Usage:
------
Full documentation, including a basic description of the algorithm, is hosted
at https://axe-demultiplexer.readthedocs.org/en/latest/ .
Implementation Progress:
------------------------
- [x] Single ended read de-multiplexing
- [x] Interleaved/Paired input and output with single-ended de-multiplexing
- [x] Combinatorial de-multiplexing
- [ ] CLI integration tests
- [ ] Comprehensive `libaxe` tests
- [ ] Comprehensive CLI tests
See also TODO.md
Publication
-----------
A publication is coming soon, if the reviewer gods decide to smile upon us.
Versioning
----------
We use Semantic Versioning. See [semver.org](http://semver.org)
LICENSE
-------
The source of axe itself, namely `src/axe*.[ch]` and `tests/*.[ch]`, is
Copyright 2014-2015 Kevin Murray. All axe source code is licensed under the GNU
GPL version 3 or greater, a copy of which is included with this source as
`LICENCE.txt`
The source of `tinytest`, located in `tests/tinytest`, is Copyright 2009-2012
Nick Matthewson; `tinytest` is distributed under the 3-clause BSD license.
`tinytest` is hosted at [Nick's github page](https://github.com/nmathewson/tinytest).
The source of `libgsl`, located in `src/gsl`, is Copyright (C) 1996, 1997,
1998, 1999, 2000, 2007 Gerard Jungman and Brian Gough. It is licensed under the
GNU General Public License, vesion 3 or greater.
The source of `libdatrie`, located in `src/datrie`, is Copyright 2006 Theppitak
Karoonboonyanan, and is licensed under the GNU LGPL version 2.1 per
`src/datrie/COPYING`. `libdatrie` is hosted at Theppitak Karoonboonyanan's
website, [here](http://linux.thai.net/~thep/datrie/datrie.html).
axe-0.3.1/TODO.md 0000664 0000000 0000000 00000000444 12635670016 0013367 0 ustar 00root root 0000000 0000000 TODO list:
==========
- Re-read documentation; fix errors
- Exhaustive CLI tests
- Refactor trie lookups to hide all datrie operations from main body of code
- Summary info
- Valgrind integration tests - maybe a CLI flag to enable
- Investigate shipping the gsl functions bundled in source.
axe-0.3.1/cmake-modules/ 0000775 0000000 0000000 00000000000 12635670016 0015024 5 ustar 00root root 0000000 0000000 axe-0.3.1/cmake-modules/FindGSL.cmake 0000664 0000000 0000000 00000007702 12635670016 0017262 0 ustar 00root root 0000000 0000000 # Try to find gnu scientific library GSL
# See
# http://www.gnu.org/software/gsl/ and
# http://gnuwin32.sourceforge.net/packages/gsl.htm
#
# Based on a script of Felix Woelk and Jan Woetzel
# (www.mip.informatik.uni-kiel.de)
#
# It defines the following variables:
# GSL_FOUND - system has GSL lib
# GSL_INCLUDE_DIRS - where to find headers
# GSL_LIBRARIES - full path to the libraries
# GSL_LIBRARY_DIRS, the directory where the PLplot library is found.
# CMAKE_GSL_CXX_FLAGS = Unix compiler flags for GSL, essentially "`gsl-config --cxxflags`"
# GSL_LINK_DIRECTORIES = link directories, useful for rpath on Unix
# GSL_EXE_LINKER_FLAGS = rpath on Unix
set( GSL_FOUND OFF )
set( GSL_CBLAS_FOUND OFF )
# Windows, but not for Cygwin and MSys where gsl-config is available
if( WIN32 AND NOT CYGWIN AND NOT MSYS )
# look for headers
find_path( GSL_INCLUDE_DIR
NAMES gsl/gsl_cdf.h gsl/gsl_randist.h
)
if( GSL_INCLUDE_DIR )
# look for gsl library
find_library( GSL_LIBRARY
NAMES gsl
)
if( GSL_LIBRARY )
set( GSL_INCLUDE_DIRS ${GSL_INCLUDE_DIR} )
get_filename_component( GSL_LIBRARY_DIRS ${GSL_LIBRARY} PATH )
set( GSL_FOUND ON )
endif( GSL_LIBRARY )
# look for gsl cblas library
find_library( GSL_CBLAS_LIBRARY
NAMES gslcblas
)
if( GSL_CBLAS_LIBRARY )
set( GSL_CBLAS_FOUND ON )
endif( GSL_CBLAS_LIBRARY )
set( GSL_LIBRARIES ${GSL_LIBRARY} ${GSL_CBLAS_LIBRARY} )
endif( GSL_INCLUDE_DIR )
mark_as_advanced(
GSL_INCLUDE_DIR
GSL_LIBRARY
GSL_CBLAS_LIBRARY
)
else( WIN32 AND NOT CYGWIN AND NOT MSYS )
if( UNIX OR MSYS )
find_program( GSL_CONFIG_EXECUTABLE gsl-config
/usr/bin/
/usr/local/bin
)
if( GSL_CONFIG_EXECUTABLE )
set( GSL_FOUND ON )
# run the gsl-config program to get cxxflags
execute_process(
COMMAND sh "${GSL_CONFIG_EXECUTABLE}" --cflags
OUTPUT_VARIABLE GSL_CFLAGS
RESULT_VARIABLE RET
ERROR_QUIET
)
if( RET EQUAL 0 )
string( STRIP "${GSL_CFLAGS}" GSL_CFLAGS )
separate_arguments( GSL_CFLAGS )
# parse definitions from cflags; drop -D* from CFLAGS
string( REGEX MATCHALL "-D[^;]+"
GSL_DEFINITIONS "${GSL_CFLAGS}" )
string( REGEX REPLACE "-D[^;]+;" ""
GSL_CFLAGS "${GSL_CFLAGS}" )
# parse include dirs from cflags; drop -I prefix
string( REGEX MATCHALL "-I[^;]+"
GSL_INCLUDE_DIRS "${GSL_CFLAGS}" )
string( REPLACE "-I" ""
GSL_INCLUDE_DIRS "${GSL_INCLUDE_DIRS}")
string( REGEX REPLACE "-I[^;]+;" ""
GSL_CFLAGS "${GSL_CFLAGS}")
else( RET EQUAL 0 )
set( GSL_FOUND FALSE )
endif( RET EQUAL 0 )
# run the gsl-config program to get the libs
execute_process(
COMMAND sh "${GSL_CONFIG_EXECUTABLE}" --libs
OUTPUT_VARIABLE GSL_LIBRARIES
RESULT_VARIABLE RET
ERROR_QUIET
)
if( RET EQUAL 0 )
string(STRIP "${GSL_LIBRARIES}" GSL_LIBRARIES )
separate_arguments( GSL_LIBRARIES )
# extract linkdirs (-L) for rpath (i.e., LINK_DIRECTORIES)
string( REGEX MATCHALL "-L[^;]+"
GSL_LIBRARY_DIRS "${GSL_LIBRARIES}" )
string( REPLACE "-L" ""
GSL_LIBRARY_DIRS "${GSL_LIBRARY_DIRS}" )
else( RET EQUAL 0 )
set( GSL_FOUND FALSE )
endif( RET EQUAL 0 )
MARK_AS_ADVANCED(
GSL_CFLAGS
)
else( GSL_CONFIG_EXECUTABLE )
message( STATUS "FindGSL: gsl-config not found.")
endif( GSL_CONFIG_EXECUTABLE )
endif( UNIX OR MSYS )
endif( WIN32 AND NOT CYGWIN AND NOT MSYS )
if( GSL_FOUND )
if( NOT GSL_FIND_QUIETLY )
message( STATUS "FindGSL: Found both GSL headers and library" )
endif( NOT GSL_FIND_QUIETLY )
else( GSL_FOUND )
if( GSL_FIND_REQUIRED )
message( FATAL_ERROR "FindGSL: Could not find GSL headers or library" )
endif( GSL_FIND_REQUIRED )
endif( GSL_FOUND )
axe-0.3.1/cmake-modules/FindLIBQES.cmake 0000664 0000000 0000000 00000006761 12635670016 0017620 0 ustar 00root root 0000000 0000000 # - Find libqes
# Find the native libqes includes and library.
# Once done this will define
#
# LIBQES_INCLUDE_DIRS - where to find qes.h, etc.
# LIBQES_LIBRARIES - List of libraries when using libqes.
# LIBQES_FOUND - True if libqes found.
#
# LIBQES_VERSION_STRING - The version of libqes found (x.y.z)
# LIBQES_VERSION_MAJOR - The major version of libqes
# LIBQES_VERSION_MINOR - The minor version of libqes
# LIBQES_VERSION_PATCH - The patch version of libqes
# LIBQES_VERSION_PREREL - The pre-release version of libqes
# LIBQES_VERSION_GIT - The git version of libqes
#
# An includer may set LIBQES_ROOT to a libqes installation root to tell
# this module where to look.
#=============================================================================
# Copyright 2014 Kevin Murray. Adapted from FindZLIB.cmake
#
# Distributed under the OSI-approved BSD License (the "License");
# see accompanying file Copyright.txt for details.
#
# This software is distributed WITHOUT ANY WARRANTY; without even the
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the License for more information.
#=============================================================================
# (To distribute this file outside of CMake, substitute the full
# License text for the above reference.)
set(_LIBQES_SEARCHES)
# Search LIBQES_ROOT first if it is set.
if(LIBQES_ROOT)
set(_LIBQES_SEARCH_ROOT PATHS ${LIBQES_ROOT} NO_DEFAULT_PATH)
list(APPEND _LIBQES_SEARCHES _LIBQES_SEARCH_ROOT)
endif()
# Normal search.
set(_LIBQES_SEARCH_NORMAL
PATHS "$ENV{PROGRAMFILES}/libqes"
)
list(APPEND _LIBQES_SEARCHES _LIBQES_SEARCH_NORMAL)
# Try each search configuration.
foreach(search ${_LIBQES_SEARCHES})
find_path(LIBQES_INCLUDE_DIR NAMES qes.h ${${search}} PATH_SUFFIXES include)
find_library(LIBQES_LIBRARY NAMES qes ${${search}} PATH_SUFFIXES lib)
endforeach()
mark_as_advanced(LIBQES_LIBRARY LIBQES_INCLUDE_DIR)
# Handle version. Again, flogged from zlib
if(LIBQES_INCLUDE_DIR AND EXISTS "${LIBQES_INCLUDE_DIR}/qes_config.h")
file(STRINGS "${LIBQES_INCLUDE_DIR}/qes_config.h" LIBQES_H REGEX "^#define LIBQES_VERSION \"[^\"]*\"")
string(REGEX REPLACE "^.*LIBQES_VERSION \"[Vv]?([0-9]+).*$" "\\1" LIBQES_VERSION_MAJOR "${LIBQES_H}")
string(REGEX REPLACE "^.*LIBQES_VERSION \"[Vv]?[0-9]+\\.([0-9]+).*$" "\\1" LIBQES_VERSION_MINOR "${LIBQES_H}")
string(REGEX REPLACE "^.*LIBQES_VERSION \"[Vv]?[0-9]+\\.[0-9]+\\.([0-9]+).*$" "\\1" LIBQES_VERSION_PATCH "${LIBQES_H}")
set(LIBQES_VERSION_STRING "${LIBQES_VERSION_MAJOR}.${LIBQES_VERSION_MINOR}.${LIBQES_VERSION_PATCH}")
# only append a EXTRA version if it exists:
set(LIBQES_VERSION_EXTRA "")
if( "${LIBQES_H}" MATCHES "^.*LIBQES_VERSION \"[Vv]?[0-9]+\\.[0-9]+\\.[0-9]+(.+)\\+git.*$")
set(LIBQES_VERSION_PREREL "${CMAKE_MATCH_1}")
endif()
if( "${LIBQES_H}" MATCHES "^.*LIBQES_VERSION \"[Vv]?[0-9]+\\.[0-9]+\\.[0-9]+.*\\+git\\.(.+)$")
set(LIBQES_VERSION_git "${CMAKE_MATCH_1}")
endif()
set(LIBQES_VERSION_STRING "${LIBQES_VERSION_STRING}${LIBQES_VERSION_PREREL}")
endif()
# handle the QUIETLY and REQUIRED arguments and set LIBQES_FOUND to TRUE if
# all listed variables are TRUE
include(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(LIBQES REQUIRED_VARS LIBQES_LIBRARY LIBQES_INCLUDE_DIR
VERSION_VAR LIBQES_VERSION_STRING)
if(LIBQES_FOUND)
set(LIBQES_INCLUDE_DIRS ${LIBQES_INCLUDE_DIR})
set(LIBQES_LIBRARIES ${LIBQES_LIBRARY})
endif()
axe-0.3.1/docs/ 0000775 0000000 0000000 00000000000 12635670016 0013226 5 ustar 00root root 0000000 0000000 axe-0.3.1/docs/.gitignore 0000664 0000000 0000000 00000000007 12635670016 0015213 0 ustar 00root root 0000000 0000000 .build
axe-0.3.1/docs/CMakeLists.txt 0000664 0000000 0000000 00000002253 12635670016 0015770 0 ustar 00root root 0000000 0000000 FIND_PROGRAM(SPHINXBUILD sphinx-build)
IF(SPHINXBUILD)
SET(ALLSPHINXOPTS -q -D latex_paper_size=a4)
ADD_CUSTOM_TARGET(doc_html
COMMAND ${SPHINXBUILD} -b html ${ALLSPHINXOPTS} ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_BINARY_DIR}/doc/html
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
)
ADD_CUSTOM_TARGET(doc_onehtml
COMMAND ${SPHINXBUILD} -b singlehtml ${ALLSPHINXOPTS} ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_BINARY_DIR}/doc/single
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
)
ADD_CUSTOM_TARGET(doc_man
COMMAND ${SPHINXBUILD} -b man ${ALLSPHINXOPTS} ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_BINARY_DIR}/doc/man
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
)
ADD_CUSTOM_TARGET(doc_clean
COMMAND ${CMAKE_COMMAND} -E remove_directory "${CMAKE_BINARY_DIR}/doc"
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
)
ADD_CUSTOM_TARGET(doc ALL DEPENDS doc_man doc_onehtml)
INSTALL(FILES ${CMAKE_BINARY_DIR}/doc/man/axe.1 DESTINATION "share/man/man1" RENAME "axe-demux.1")
SET_DIRECTORY_PROPERTIES(PROPERTY ADDITIONAL_MAKE_CLEAN_FILES doc/)
ELSE()
MESSAGE(WARNING "Cannot build documenation, sphinx isn't installed")
ENDIF()
axe-0.3.1/docs/Makefile 0000664 0000000 0000000 00000015136 12635670016 0014674 0 ustar 00root root 0000000 0000000 # Makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
PAPER =
BUILDDIR = .build
# User-friendly check for sphinx-build
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
endif
# Internal variables.
PAPEROPT_a4 = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
# the i18n builder cannot share the environment and doctrees with the others
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
help:
@echo "Please use \`make ' where is one of"
@echo " html to make standalone HTML files"
@echo " dirhtml to make HTML files named index.html in directories"
@echo " singlehtml to make a single large HTML file"
@echo " pickle to make pickle files"
@echo " json to make JSON files"
@echo " htmlhelp to make HTML files and a HTML help project"
@echo " qthelp to make HTML files and a qthelp project"
@echo " devhelp to make HTML files and a Devhelp project"
@echo " epub to make an epub"
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
@echo " latexpdf to make LaTeX files and run them through pdflatex"
@echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
@echo " text to make text files"
@echo " man to make manual pages"
@echo " texinfo to make Texinfo files"
@echo " info to make Texinfo files and run them through makeinfo"
@echo " gettext to make PO message catalogs"
@echo " changes to make an overview of all changed/added/deprecated items"
@echo " xml to make Docutils-native XML files"
@echo " pseudoxml to make pseudoxml-XML files for display purposes"
@echo " linkcheck to check all external links for integrity"
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
clean:
rm -rf $(BUILDDIR)/*
html:
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
dirhtml:
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
singlehtml:
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
@echo
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
pickle:
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
@echo
@echo "Build finished; now you can process the pickle files."
json:
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
@echo
@echo "Build finished; now you can process the JSON files."
htmlhelp:
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
@echo
@echo "Build finished; now you can run HTML Help Workshop with the" \
".hhp project file in $(BUILDDIR)/htmlhelp."
qthelp:
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
@echo
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/axe.qhcp"
@echo "To view the help file:"
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/axe.qhc"
devhelp:
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
@echo
@echo "Build finished."
@echo "To view the help file:"
@echo "# mkdir -p $$HOME/.local/share/devhelp/axe"
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/axe"
@echo "# devhelp"
epub:
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
@echo
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
latex:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
@echo "Run \`make' in that directory to run these through (pdf)latex" \
"(use \`make latexpdf' here to do that automatically)."
latexpdf:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through pdflatex..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
latexpdfja:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through platex and dvipdfmx..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
text:
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
@echo
@echo "Build finished. The text files are in $(BUILDDIR)/text."
man:
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
@echo
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
texinfo:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
@echo "Run \`make' in that directory to run these through makeinfo" \
"(use \`make info' here to do that automatically)."
info:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo "Running Texinfo files through makeinfo..."
make -C $(BUILDDIR)/texinfo info
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
gettext:
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
@echo
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
changes:
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
@echo
@echo "The overview file is in $(BUILDDIR)/changes."
linkcheck:
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
@echo
@echo "Link check complete; look for any errors in the above output " \
"or in $(BUILDDIR)/linkcheck/output.txt."
doctest:
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
@echo "Testing of doctests in the sources finished, look at the " \
"results in $(BUILDDIR)/doctest/output.txt."
xml:
$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
@echo
@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
pseudoxml:
$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
@echo
@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
axe-0.3.1/docs/algorithm.rst 0000664 0000000 0000000 00000007711 12635670016 0015754 0 ustar 00root root 0000000 0000000 ************************
Axe's matching algorithm
************************
Axe uses an algorithm based on longest-prefix-in-trie matching to match a
variable length from the start of each read against a set of 'mutated'
barcodes.
Hamming distance matching
-------------------------
While for most applications in high-throughput sequencing hamming distances are
a frowned-upon metric, it is typical for HTS read barcodes to be designed to
tolerate a certain level of hamming mismatches. Given these sequences are short
and typically occur at the 5' end of reads, insertions and deletions rarely
need be considered, and the increased rate of assignment of reads with many
errors is offset by the risk of falsely assigning barcodes to an incorrect
sample. In any case, reads with more than 1-2 sequencing errors in their first
several bases are likely to be poor quality, and will simply be filtered out
during downstream quality control.
Hamming mismatch tries
----------------------
Typically, reads are matched to a set of barcodes by calculating the hamming
distance between the barcode, and the first :math:`l` bases of a read for a
barcode of length :math:`l`. The "correct" barcode is then selected by
recording either the barcode with the lowest hamming distance to the read
(competitive matching) or by simply accepting the first barcode with a hamming
distance below a certain threshold. These approaches are both very
computationally expensive, and can have lower accuracy than the algorithm I
propose. Additionally, implementations of these methods rarely handle barcodes
of differing length and combinatorial barcoding well, if at all.
Central to Axe's algorithm is the concept of hamming-mismatch tries. A trie is
a N-ary tree for an N letter alphabet. In the case of high-throughput
sequencing reads, we have the alphabet ``AGCT``, corresponding to the four
nucleotides of DNA, plus ``N``, used to represent ambiguous base calls. Instead
of matching each barcode to each read, we pre-calculate all allowable sequences
at each mismatch level, and store these in level-wise tries. For example, to
match to a hamming distance of 2, we create three tries: One containing all
barcodes, verbatim, and two tries where every sequence within a hamming
distance of 1 and 2 of each barcode respectively. Hereafter, these tries are
referred to as the 0, 1 and 2-mm tries, for a hamming distance (mismatch) of
0, 1 and 2. Then, we find the longest prefix in each sequence read in the 0mm
trie. If this prefix is not a valid leaf in the 0mm trie, we find the longest
prefix in the 1mm trie, and so on for all tries in ascending order. If no
prefix of the read is a complete sequence in any trie, the read is assigned to
an "non-barcoded" output file.
This algorithm ensures optimal barcode matching in many ways, but is also
extremely fast. In situations with barcodes of differing length, we ensure that
the *longest* acceptable barcode at a given hamming distance is chosen;
assuming that sequence is random after the barcode, the probability of false
assignments using this method is low. We also ensure that short perfect matches
are preferred to longer inexact matches, as we firstly only consider barcodes
with no error, then 1 error, and so on. This ensures that reads with barcodes
that are followed by random sequence that happens to inexactly match a longer
barcode in the set are not falsely assigned to this longer barcode.
The speed of this algorithm is largely due to the constant time matching
algorithm with respect to the number of barcodes to match. The time taken to
match each read is proportional instead to the length of the barcodes, as for a
barcode of length :math:`l`, at most :math:`l + 1` trie level descents are
required to find an entry in the trie. As this length is more-or-less constant
and small, the overall complexity of axe's algorithm is :math:`O(n)` for
:math:`n` reads, as opposed to :math:`O(nm)` for :math:`n` reads and :math:`m`
barcodes as is typical for traditional matching algorithms
axe-0.3.1/docs/conf.py 0000664 0000000 0000000 00000024027 12635670016 0014532 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
#
# axe documentation build configuration file, created by
# sphinx-quickstart on Fri Jul 25 09:16:47 2014.
#
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
import sys
import os
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
sys.path.insert(0, os.path.abspath('.'))
import gitversion
# -- General configuration ------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.todo',
'sphinx.ext.pngmath',
'sphinx.ext.ifconfig',
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['.templates']
# The suffix of source filenames.
source_suffix = '.rst'
# The encoding of source files.
#source_encoding = 'utf-8-sig'
# The master toctree document.
master_doc = 'index'
# General information about the project.
project = u'axe'
copyright = u'2014, Kevin Murray'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = gitversion.get_versions()['version']
# The full version, including alpha/beta/rc tags.
release = version
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#language = None
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#today = ''
# Else, today_fmt is used as the format for a strftime call.
#today_fmt = '%B %d, %Y'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['.build']
# The reST default role (used for this markup: `text`) to use for all
# documents.
#default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
#add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
#add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
#show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# A list of ignored prefixes for module index sorting.
#modindex_common_prefix = []
# If true, keep warnings as "system message" paragraphs in the built documents.
#keep_warnings = False
# -- Options for HTML output ----------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = 'default'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#html_theme_options = {}
# Add any paths that contain custom themes here, relative to this directory.
#html_theme_path = []
# The name for this set of Sphinx documents. If None, it defaults to
# " v documentation".
#html_title = None
# A shorter title for the navigation bar. Default is the same as html_title.
#html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
#html_logo = None
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
#html_favicon = None
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['.static']
# Add any extra paths that contain custom files (such as robots.txt or
# .htaccess) here, relative to this directory. These files are copied
# directly to the root of the documentation.
#html_extra_path = []
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
html_last_updated_fmt = '%b %d, %Y'
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
#html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
#html_sidebars = {}
# Additional templates that should be rendered to pages, maps page names to
# template names.
#html_additional_pages = {}
# If false, no module index is generated.
#html_domain_indices = True
# If false, no index is generated.
#html_use_index = True
# If true, the index is split into individual pages for each letter.
#html_split_index = False
# If true, links to the reST sources are added to the pages.
#html_show_sourcelink = True
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
#html_show_sphinx = True
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
#html_show_copyright = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
#html_use_opensearch = ''
# This is the file name suffix for HTML files (e.g. ".xhtml").
#html_file_suffix = None
# Output file base name for HTML help builder.
htmlhelp_basename = 'axedoc'
# -- Options for LaTeX output ---------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
'papersize': 'a4paper',
# The font size ('10pt', '11pt' or '12pt').
'pointsize': '11pt',
# Additional stuff for the LaTeX preamble.
#'preamble': '',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
('index', 'axe.tex', u'axe Documentation',
u'Kevin Murray', 'manual'),
]
# The name of an image file (relative to this directory) to place at the top of
# the title page.
#latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
#latex_use_parts = False
# If true, show page references after internal links.
#latex_show_pagerefs = False
# If true, show URL addresses after external links.
#latex_show_urls = False
# Documents to append as an appendix to all manuals.
#latex_appendices = []
# If false, no module index is generated.
#latex_domain_indices = True
# -- Options for manual page output ---------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
('index', 'axe', u'axe Documentation',
[u'Kevin Murray'], 1)
]
# If true, show URL addresses after external links.
#man_show_urls = False
# -- Options for Texinfo output -------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
('index', 'axe', u'axe Documentation',
u'Kevin Murray', 'axe', 'One line description of project.',
'Miscellaneous'),
]
# Documents to append as an appendix to all manuals.
#texinfo_appendices = []
# If false, no module index is generated.
#texinfo_domain_indices = True
# How to display URL addresses: 'footnote', 'no', or 'inline'.
#texinfo_show_urls = 'footnote'
# If true, do not generate a @detailmenu in the "Top" node's menu.
#texinfo_no_detailmenu = False
# -- Options for Epub output ----------------------------------------------
# Bibliographic Dublin Core info.
epub_title = u'axe'
epub_author = u'Kevin Murray'
epub_publisher = u'Kevin Murray'
epub_copyright = u'2014, Kevin Murray'
# The basename for the epub file. It defaults to the project name.
#epub_basename = u'axe'
# The HTML theme for the epub output. Since the default themes are not optimized
# for small screen space, using the same theme for HTML and epub output is
# usually not wise. This defaults to 'epub', a theme designed to save visual
# space.
#epub_theme = 'epub'
# The language of the text. It defaults to the language option
# or en if the language is not set.
#epub_language = ''
# The scheme of the identifier. Typical schemes are ISBN or URL.
#epub_scheme = ''
# The unique identifier of the text. This can be a ISBN number
# or the project homepage.
#epub_identifier = ''
# A unique identification for the text.
#epub_uid = ''
# A tuple containing the cover image and cover page html template filenames.
#epub_cover = ()
# A sequence of (type, uri, title) tuples for the guide element of content.opf.
#epub_guide = ()
# HTML files that should be inserted before the pages created by sphinx.
# The format is a list of tuples containing the path and title.
#epub_pre_files = []
# HTML files shat should be inserted after the pages created by sphinx.
# The format is a list of tuples containing the path and title.
#epub_post_files = []
# A list of files that should not be packed into the epub file.
epub_exclude_files = ['search.html']
# The depth of the table of contents in toc.ncx.
#epub_tocdepth = 3
# Allow duplicate toc entries.
#epub_tocdup = True
# Choose between 'default' and 'includehidden'.
#epub_tocscope = 'default'
# Fix unsupported image types using the PIL.
#epub_fix_images = False
# Scale large images.
#epub_max_image_width = 0
# How to display URL addresses: 'footnote', 'no', or 'inline'.
#epub_show_urls = 'inline'
# If false, no index is generated.
#epub_use_index = True
axe-0.3.1/docs/gitversion.py 0000664 0000000 0000000 00000016512 12635670016 0015776 0 ustar 00root root 0000000 0000000 # This file helps to compute a version number in source trees obtained from
# git-archive tarball (such as those provided by githubs download-from-tag
# feature). Distribution tarballs (built by setup.py sdist) and build
# directories (produced by setup.py build) will contain a much shorter file
# that just contains the computed version number.
# This file is released into the public domain. Generated by
# versioneer-0.13 (https://github.com/warner/python-versioneer)
# these strings will be replaced by git during git-archive
git_refnames = " (HEAD -> master, tag: 0.3.1)"
git_full = "5779b83807f368192275a7d5ae37facfd5f85737"
# these strings are filled in when 'setup.py versioneer' creates _version.py
tag_prefix = ""
parentdir_prefix = ""
versionfile_source = "docs/version.py"
import errno
import os
import re
import subprocess
import sys
def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
assert isinstance(commands, list)
p = None
for c in commands:
try:
# remember shell=False, so use git.cmd on windows, not just git
p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE,
stderr=(subprocess.PIPE if hide_stderr
else None))
break
except EnvironmentError:
e = sys.exc_info()[1]
if e.errno == errno.ENOENT:
continue
if verbose:
print("unable to run %s" % args[0])
print(e)
return None
else:
if verbose:
print("unable to find command, tried %s" % (commands,))
return None
stdout = p.communicate()[0].strip()
if sys.version >= '3':
stdout = stdout.decode()
if p.returncode != 0:
if verbose:
print("unable to run %s (error)" % args[0])
return None
return stdout
def versions_from_parentdir(parentdir_prefix, root, verbose=False):
# Source tarballs conventionally unpack into a directory that includes
# both the project name and a version string.
dirname = os.path.basename(root)
if not dirname.startswith(parentdir_prefix):
if verbose:
print("guessing rootdir is '%s', but '%s' doesn't start with "
"prefix '%s'" % (root, dirname, parentdir_prefix))
return None
return {"version": dirname[len(parentdir_prefix):], "full": ""}
def git_get_keywords(versionfile_abs):
# the code embedded in _version.py can just fetch the value of these
# keywords. When used from setup.py, we don't want to import _version.py,
# so we do it with a regexp instead. This function is not used from
# _version.py.
keywords = {}
try:
f = open(versionfile_abs, "r")
for line in f.readlines():
if line.strip().startswith("git_refnames ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
keywords["refnames"] = mo.group(1)
if line.strip().startswith("git_full ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
keywords["full"] = mo.group(1)
f.close()
except EnvironmentError:
pass
return keywords
def git_versions_from_keywords(keywords, tag_prefix, verbose=False):
if not keywords:
return {} # keyword-finding function failed to find keywords
refnames = keywords["refnames"].strip()
if refnames.startswith("$Format"):
if verbose:
print("keywords are unexpanded, not using")
return {} # unexpanded, so not in an unpacked git-archive tarball
refs = set([r.strip() for r in refnames.strip("()").split(",")])
# starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
# just "foo-1.0". If we see a "tag: " prefix, prefer those.
TAG = "tag: "
tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
if not tags:
# Either we're using git < 1.8.3, or there really are no tags. We use
# a heuristic: assume all version tags have a digit. The old git %d
# expansion behaves like git log --decorate=short and strips out the
# refs/heads/ and refs/tags/ prefixes that would let us distinguish
# between branches and tags. By ignoring refnames without digits, we
# filter out many common branch names like "release" and
# "stabilization", as well as "HEAD" and "master".
tags = set([r for r in refs if re.search(r'\d', r)])
if verbose:
print("discarding '%s', no digits" % ",".join(refs-tags))
if verbose:
print("likely tags: %s" % ",".join(sorted(tags)))
for ref in sorted(tags):
# sorting will prefer e.g. "2.0" over "2.0rc1"
if ref.startswith(tag_prefix):
r = ref[len(tag_prefix):]
if verbose:
print("picking %s" % r)
return {"version": r,
"full": keywords["full"].strip()}
# no suitable tags, so we use the full revision id
if verbose:
print("no suitable tags, using full revision id")
return {"version": keywords["full"].strip(),
"full": keywords["full"].strip()}
def git_versions_from_vcs(tag_prefix, root, verbose=False):
# this runs 'git' from the root of the source tree. This only gets called
# if the git-archive 'subst' keywords were *not* expanded, and
# _version.py hasn't already been rewritten with a short version string,
# meaning we're inside a checked out source tree.
if not os.path.exists(os.path.join(root, ".git")):
if verbose:
print("no .git in %s" % root)
return {}
GITS = ["git"]
if sys.platform == "win32":
GITS = ["git.cmd", "git.exe"]
stdout = run_command(GITS, ["describe", "--tags", "--dirty", "--always"],
cwd=root)
if stdout is None:
return {}
if not stdout.startswith(tag_prefix):
if verbose:
fmt = "tag '%s' doesn't start with prefix '%s'"
print(fmt % (stdout, tag_prefix))
return {}
tag = stdout[len(tag_prefix):]
stdout = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
if stdout is None:
return {}
full = stdout.strip()
if tag.endswith("-dirty"):
full += "-dirty"
return {"version": tag, "full": full}
def get_versions(default={"version": "unknown", "full": ""}, verbose=False):
# I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
# __file__, we can work backwards from there to the root. Some
# py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
# case we can only use expanded keywords.
keywords = {"refnames": git_refnames, "full": git_full}
ver = git_versions_from_keywords(keywords, tag_prefix, verbose)
if ver:
return ver
try:
root = os.path.realpath(__file__)
# versionfile_source is the relative path from the top of the source
# tree (where the .git directory might live) to this file. Invert
# this to find the root from __file__.
for i in range(len(versionfile_source.split('/'))):
root = os.path.dirname(root)
except NameError:
return default
return (git_versions_from_vcs(tag_prefix, root, verbose)
or versions_from_parentdir(parentdir_prefix, root, verbose)
or default)
axe-0.3.1/docs/index.rst 0000664 0000000 0000000 00000001334 12635670016 0015070 0 ustar 00root root 0000000 0000000 .. axe documentation master file, created by
sphinx-quickstart on Fri Jul 25 09:16:47 2014.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Welcome to axe's documentation!
===============================
Axe is a read de-multiplexer, useful in situations where sequence reads contain
the barcodes that uniquely distinguish samples. Axe uses a rapid and accurate
algorithm based on hamming mismatch tries to competitively match the prefix of
a sequencing read against a set of barcodes. Axe supports combinatorial
barcoding schemes.
Contents:
.. toctree::
:maxdepth: 2
usage
algorithm
Indices and tables
==================
* :ref:`genindex`
axe-0.3.1/docs/usage.rst 0000664 0000000 0000000 00000012617 12635670016 0015073 0 ustar 00root root 0000000 0000000 *********
Axe Usage
*********
.. note::
For arcane reasons, the name of the ``axe`` binary changed to ``axe-demux``
with version 0.3.0. Apologies for the inconvenience, this was required to
make ``axe`` installable in Debian and its derivatives. Command-line usage
did not change.
Axe has several usage modes. The primary distinction is between the two
alternate barcoding schemes, single and combinatorial barcoding. Single barcode
matching is used when only the first read contains barcode sequences.
Combinatorial barcoding is used when both reads in a read pair contain
independent (typically different) barcode sequences.
For concise reference, the command-line usage of ``axe-demux`` is reproduced
below:
.. literalinclude:: usage.txt
Inputs and Outputs
------------------
Regardless of read mode, three input and output schemes are supported:
single-end reads, paired reads (separate R1 and R2 files) and interleaved
paired reads (one file, with R1 and R2 as consecutive reads). If single end
reads are inputted, they must be output as single end reads. If either paired or
interleaved paired reads are read, they can be output as either paired reads or
interleaved paired reads. This applies to both successfully de-multiplexed reads
and reads that could not be de-multiplexed.
The ``-z`` flag can be used to specify that outputs should be compressed using
gzip compression. The ``-z`` flag takes an integer argument between 0 (the
default) and 9, where 0 indicates plain text output (``gzopen`` mode "wT"), and
1-9 indicate that the respective compression level should be used, where 1 is
fastest and 9 is most compact.
The output flags should be prefixes that are used to generate the output file
name based on the barcode's (or barcode pair's) ID. The names are generated as:
``prefix`` + ``_`` + ``barcode ID`` + ``_`` + ``read number`` + ``.extension``.
The output file for reads that could not be demultiplexed is ``prefix`` + ``_``
+ ``unknown`` + ``_`` + ``read number`` + ``.extension``. The read number is
omitted unless the paired read file scheme is used, and is "il" for interleaved
output. The extension is "fastq"; ".gz" is appended to the extension if the
``-z`` flag is used.
The corresponding CLI flags are:
- ``-f`` and ``-F``: Single end or paired R1 file input and output
respectively.
- ``-r`` and ``-R``: Paired R2 file input and output.
- ``-i`` and ``-I``: Interleaved paired input and output.
The barcode file
----------------
The barcode file is a tab-separated file with an optional header. It is
mandatory, and is always supplied using the ``-b`` command line flag. The exact
format is dependent on barcoding mode, and is described further in the sections
below. If a header is present, the header line must start with either
`Barcode` or ``barcode``, or it will be interpreted as a barcode line, leading
to a parsing error. Any line starting with ';' or '#' is ignored, allowing
comments to be added in line with barcodes. Please ensure that the software
used to produce the barcode uses ASCII encoding, and does not insert a
Byte-order Mark (BoM) as many text editors can silently use Unicode-based
encoding schemes. I recommend the use of
`LibreOffice Calc `_ (part of a free and open source
office suite) to generate barcode tables; Microsoft Excel can also be used.
Mismatch level selection
------------------------
Independent of barcode mode, the ``-m`` flag is used to select the maximum
allowable hamming distance between a read's prefix and a barcode to be
considered as a match. As "mutated" barcodes must be unique, a hamming distance
of one is the default as typically barcodes are designed to differ by a hamming
distance of at least two. Optionally, (using the ``-p`` flag), axe will allow
selective mismatch levels, where, if clashes are observed, the barcode will
only be matched exactly. This allows one to process datasets with barcodes that
don't have a sufficiently high distance between them.
Single barcode mode
-------------------
Single barcode mode is the default mode of operation. Barcodes are matched
against read one (hereafter the forward read), and the barcode is trimmed from
only the forward read, unless the ``-2`` command line flag is given, in which
case a prefix the same length as the matched barcode is also trimmed from the
second or reverse read. Note that sequence of this second read is not checked
before trimming.
In single barcode mode, the barcode file has two columns: ``Barcode`` and
``ID``.
Combinatorial barcode mode
--------------------------
Combinatorial barcode mode is activated by giving the ``-c`` flag on the
command line. Forward read barcodes are matched against the forward read, and
reverse read barcodes are matched against the reverse read. The optimal
barcodes are selected independently, and the barcode pair is selected from
these two barcodes. The respective barcodes are trimmed from both reads; the
``-2`` command line flag has no effect in combinatorial barcode mode.
In combinatorial barcode mode, the barcode file has three columns:
``Barcode1``, ``Barcode2`` and ``ID``. Individual barcodes can occur many times
within the forward and reverse barcodes, but barcode pairs must be unique
combinations.
The Demultipexing Statistics File
---------------------------------
The ``-t`` option allows the output of per-sample read counts to a
tab-separated file. The file will have a header describing its format, and
includes a line for unbarcoded reads.
axe-0.3.1/docs/usage.txt 0000664 0000000 0000000 00000002312 12635670016 0015071 0 ustar 00root root 0000000 0000000 USAGE:
axe-demux [-mzc2pt] -b (-f [-r] | -i) (-F [-R] | -I)
axe-demux -h
axe-demux -v
OPTIONS:
-m, --mismatch Maximum hamming distance mismatch. [int, default 1]
-z, --ziplevel Gzip compression level, or 0 for plain text [int, default 0]
-c, --combinatorial Use combinatorial barcode matching. [flag, default OFF]
-p, --permissive Don't error on barcode mismatch confict, matching only
exactly for conficting barcodes. [flag, default OFF]
-2, --trim-r2 Trim barcode from R2 read as well as R1. [flag, default OFF]
-b, --barcodes Barcode file. See --help for example. [file]
-f, --fwd-in Input forward read. [file]
-F, --fwd-out Output forward read prefix. [file]
-r, --rev-in Input reverse read. [file]
-R, --rev-out Output reverse read prefix. [file]
-i, --ilfq-in Input interleaved paired reads. [file]
-I, --ilfq-out Output interleaved paired reads prefix. [file]
-t, --table-file Output a summary table of demultiplexing statistics to file. [file]
-h, --help Print this usage plus additional help.
-V, --version Print version string.
-v, --verbose Be more verbose. Additive, -vv is more vebose than -v.
-q, --quiet Be very quiet.
axe-0.3.1/src/ 0000775 0000000 0000000 00000000000 12635670016 0013065 5 ustar 00root root 0000000 0000000 axe-0.3.1/src/CMakeLists.txt 0000664 0000000 0000000 00000002434 12635670016 0015630 0 ustar 00root root 0000000 0000000 # Copyright 2014-2015 Kevin Murray
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
# Axe library (libaxe.a)
FILE(GLOB DATRIE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/datrie/*.c)
FILE(GLOB GSL_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/gsl/*.c)
SET(AXELIB_SRCS ${DATRIE_SRCS} axe.c)
IF (NOT GSL_FOUND)
MESSAGE(STATUS "Using bundled GSL sources")
SET(AXELIB_SRCS ${AXELIB_SRCS} ${GSL_SRCS})
ENDIF()
ADD_LIBRARY(axelib STATIC ${AXELIB_SRCS})
TARGET_LINK_LIBRARIES(axelib qes_static ${AXE_DEP_LIBS})
SET_TARGET_PROPERTIES(axelib PROPERTIES OUTPUT_NAME axe)
# Executable
ADD_EXECUTABLE(axe-demux main.c)
TARGET_LINK_LIBRARIES(axe-demux ${AXE_DEPENDS_LIBS} axelib)
INSTALL(TARGETS axe-demux DESTINATION "bin")
axe-0.3.1/src/axe.c 0000664 0000000 0000000 00000145125 12635670016 0014016 0 ustar 00root root 0000000 0000000 /*
* ============================================================================
*
* Filename: axe.c
* Description: Demultiplex reads by 5' barcodes
* Copyright: 2014-2015 Kevin Murray
* License: GNU GPL v3+
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*
* ============================================================================
*/
#include "axe.h"
#include "gsl_combination.h"
/* Holds the current timestamp, so we don't have to free the returned string
* from now(). */
char _time_now[10] = "";
const char *progress_bar_chars = "|/-\\";
unsigned int format_call_number;
char *
axe_formatter(struct qes_log_entry *entry)
{
char *buf = NULL;
const char *colour = ANSIRST;
const char *reset = ANSIRST;
char marker = ' ';
int res = 0;
if (entry == NULL || entry->message == NULL) return NULL;
if (entry->level <= QES_LOG_DEBUG) {
marker = '.';
colour = ANSIBEG ATDIM FGCYN BGBLK ANSIEND;
reset = "";
} else if (entry->level == AXE_LOG_PROGRESS) {
marker = progress_bar_chars[format_call_number++ % 4];
colour = ANSIBEG ATNRM FGGRN BGBLK ANSIEND;
} else if (entry->level == AXE_LOG_BOLD) {
marker = '\0';
colour = ANSIBEG ATBLD FGCYN BGBLK ANSIEND;
} else if (entry->level <= QES_LOG_INFO) {
marker = '\0';
colour = ANSIBEG ATNRM FGGRN BGBLK ANSIEND;
} else if (entry->level <= QES_LOG_WARNING) {
marker = '!';
colour = ANSIBEG ATULN FGYEL BGBLK ANSIEND;
} else if (entry->level <= QES_LOG_ERROR) {
marker = 'E';
colour = ANSIBEG ATBLD FGMAG BGBLK ANSIEND;
} else {
marker = 'F';
colour = ANSIBEG ATBLD ATBNK FGRED BGBLK ANSIEND;
}
if (marker == '\0') {
res = asprintf(&buf, "%s%s%s", colour, entry->message, reset);
} else {
res = asprintf(&buf, "%s[%c] %s%s", colour, marker, entry->message, reset);
}
if (res > 0) {
return buf;
} else {
return NULL;
}
}
/* Axe barcode struct ctor/dtor */
struct axe_barcode *
axe_barcode_create(void)
{
struct axe_barcode *bcd = NULL;
bcd = qes_calloc(1, sizeof(*bcd));
return bcd;
}
void
axe_barcode_destroy_(struct axe_barcode *barcode)
{
if (!axe_barcode_ok(barcode)) return;
qes_free(barcode->seq1);
qes_free(barcode->seq2);
qes_free(barcode->id);
barcode->len1 = 0;
barcode->len2 = 0;
qes_free(barcode);
}
/* Axe config struct ctor/dtor */
struct axe_config *
axe_config_create(void)
{
struct axe_config *config = qes_calloc(1, sizeof(*config));
config->logger = qes_logger_create();
/* qes_calloc never returns null, we use errprintexit as the err handler */
return config;
}
void
axe_config_destroy_(struct axe_config *config)
{
size_t iii = 0;
if (config == NULL) {
return;
}
qes_log_message_debug(config->logger,
"Destroying config structure\n");
/* File names */
qes_free(config->barcode_file);
qes_free(config->table_file);
qes_free(config->out_prefixes[0]);
qes_free(config->out_prefixes[1]);
qes_free(config->infiles[0]);
qes_free(config->infiles[1]);
/* outputs */
if (config->outputs != NULL) {
for (iii = 0; iii < config->n_barcode_pairs; iii ++) {
axe_output_destroy(config->outputs[iii]);
}
}
qes_free(config->outputs);
axe_output_destroy(config->unknown_output);
/* barcode pairs */
if (config->barcodes != NULL) {
for (iii = 0; iii < config->n_barcode_pairs; iii++) {
axe_barcode_destroy(config->barcodes[iii]);
}
}
qes_free(config->barcodes);
/* barcode lookup */
if (config->barcode_lookup != NULL) {
for (iii = 0; iii < config->n_barcodes_1; iii++) {
qes_free(config->barcode_lookup[iii]);
}
}
qes_free(config->barcode_lookup);
/* Tries */
axe_trie_destroy(config->fwd_trie);
axe_trie_destroy(config->rev_trie);
/* Logger */
qes_logger_destroy(config->logger);
/* config stuct */
qes_free(config);
}
static char *
_axe_format_outfile_path (const char *prefix, const char *id, int read,
const char *ext)
{
char buf[4096];
int res = 0;
char *our_prefix = NULL;
char lastchr = '\0';
size_t prefix_len = 0;
if (prefix == NULL || id == NULL) {
return NULL;
}
prefix_len = strlen(prefix);
lastchr = prefix[prefix_len - 1];
if (lastchr == '/' || lastchr == '\\') {
/* Our prefix is a directory, don't add '_' */
our_prefix = strdup(prefix);
} else {
/* Duplicate and append an underscore to prefix */
our_prefix = qes_malloc(prefix_len + 2);
our_prefix[prefix_len + 1] = '\0';
strncpy(our_prefix, prefix, prefix_len);
our_prefix[prefix_len] = '_';
}
if (read > 0) {
res = snprintf(buf, 4096, "%s%s_R%d.%s", our_prefix, id, read, ext);
} else {
res = snprintf(buf, 4096, "%s%s_il.%s", our_prefix, id, ext);
}
if (res >= 4096) {
qes_free(our_prefix);
return NULL;
}
qes_free(our_prefix);
return strndup(buf, 4096);
}
struct axe_output *
axe_output_create(const char *fwd_fpath, const char *rev_fpath,
enum read_mode mode, const char *fp_mode)
{
struct axe_output *out = NULL;
if (mode == READS_UNKNOWN || fwd_fpath == NULL || \
(mode == READS_PAIRED && rev_fpath == NULL)) {
return NULL;
}
out = qes_calloc(1, sizeof(*out));
out->mode = mode;
out->fwd_file = qes_seqfile_create(fwd_fpath, fp_mode);
if (out->fwd_file == NULL) {
qes_free(out);
return NULL;
}
qes_seqfile_set_format(out->fwd_file, FASTQ_FMT);
if (rev_fpath != NULL) {
out->rev_file = qes_seqfile_create(rev_fpath, fp_mode);
if (out->rev_file == NULL) {
qes_seqfile_destroy(out->fwd_file);
qes_free(out);
return NULL;
}
qes_seqfile_set_format(out->rev_file, FASTQ_FMT);
} else {
out->rev_file = NULL;
}
return out;
}
void
axe_output_destroy_(struct axe_output *output)
{
if (output != NULL) {
qes_seqfile_destroy(output->fwd_file);
qes_seqfile_destroy(output->rev_file);
output->mode = READS_UNKNOWN;
qes_free(output);
}
}
static inline struct axe_barcode *
read_barcode_combo(char *line)
{
char seq1[100] = "";
char seq2[100] = "";
char id[100] = "";
int res = 0;
struct axe_barcode *barcode = NULL;
if (line == NULL) {
return NULL;
}
res = sscanf(line, "%99s\t%99s\t%99s", seq1, seq2, id);
if (res < 3) {
return NULL;
}
barcode = axe_barcode_create();
if (barcode == NULL) {
return NULL;
}
/* Duplicate on the heap the R1 seq */
barcode->seq1 = strndup(seq1, 100);
if (barcode->seq1 == NULL) goto error;
barcode->len1 = strnlen(seq1, 100);
/* Second barcode too */
barcode->seq2 = strndup(seq2, 100);
if (barcode->seq2 == NULL) goto error;
barcode->len2 = strnlen(seq2, 100);
/* And the ID */
barcode->id = strndup(id, 100);
if (barcode->id == NULL) goto error;
barcode->idlen = strnlen(id, 100);
return barcode;
error:
axe_barcode_destroy(barcode);
return NULL;
}
static inline struct axe_barcode *
read_barcode_single(char *line)
{
char seq[100] = "";
char id[100] = "";
int res = 0;
struct axe_barcode * barcode = NULL;
if (line == NULL) {
return NULL;
}
res = sscanf(line, "%99s\t%99s", seq, id);
if (res < 2) {
return NULL;
}
barcode = axe_barcode_create();
if (barcode == NULL) {
return NULL;
}
/* Duplicate on the heap the R1 seq */
barcode->seq1 = strndup(seq, 100);
if (barcode->seq1 == NULL) goto error;
barcode->len1 = strnlen(seq, 100);
/* And the ID */
barcode->id = strndup(id, 100);
if (barcode->id == NULL) goto error;
barcode->idlen = strnlen(id, 100);
return barcode;
error:
axe_barcode_destroy(barcode);
return NULL;
}
int
axe_read_barcodes(struct axe_config *config)
{
struct qes_file *qf = NULL;
struct axe_barcode *this_barcode = NULL;
struct axe_barcode **barcodes = NULL;
size_t n_barcode_pairs = 0; /* Entries in file */
size_t n_barcodes_alloced = 8;
const char *bad_fname_chars = "'\"!@#$%^&*()+=~`[]{}\\|;:/?><,";
char *line = NULL;
char *tmp = NULL;
size_t linesz = 128;
ssize_t linelen = 0;
size_t iii = 0;
if (!axe_config_ok(config)) {
return -1;
}
barcodes = qes_calloc(n_barcodes_alloced, sizeof(*barcodes));
qf = qes_file_open(config->barcode_file, "r");
line = qes_malloc(linesz);
while ((linelen = qes_file_readline_realloc(qf, &line, &linesz)) > 0) {
/* Skip an optional header line */
if (strncmp(line, "Barcode", 7) == 0 || \
strncmp(line, "barcode", 7) == 0) {
continue;
}
/* Skip #-comment or ;-comment */
if (line[0] == '#' || line[0] == ';') {
continue;
}
/* Reallocate the array if we need to */
if (n_barcode_pairs == n_barcodes_alloced) {
n_barcodes_alloced *= 2;
barcodes = qes_realloc(barcodes,
n_barcodes_alloced * sizeof(*barcodes));
}
/* Read the barcode line into a ``struct axe_barcode`` */
if (config->match_combo) {
this_barcode = read_barcode_combo(line);
} else {
this_barcode = read_barcode_single(line);
}
if (this_barcode == NULL) {
qes_log_format_fatal(config->logger,
"Couldn't parse barcode line '%s'\n", line);
qes_log_message_warning(config->logger,
"Check that the format is correct and has "
"UNIX line endings.");
goto error;
}
/* Replace all bad chars with '-' */
tmp = strpbrk(this_barcode->id, bad_fname_chars);
while (tmp != NULL) {
*tmp = '-'; /* Replace with dash */
tmp = strpbrk(tmp + 1, bad_fname_chars);
}
/* Add the barcode to the array */
barcodes[n_barcode_pairs++] = this_barcode;
}
/* Save the array to the config struct */
config->barcodes = barcodes;
config->n_barcode_pairs = n_barcode_pairs;
qes_file_close(qf);
qes_free(line);
if (config->verbosity > 0) {
qes_log_format_info(config->logger,
"read_barcodes -- (%s) Read in barcodes\n",
nowstr());
}
return 0;
error:
if (barcodes != NULL) {
for (iii = 0; iii < n_barcode_pairs; iii++) {
axe_barcode_destroy(barcodes[iii]);
}
}
qes_file_close(qf);
qes_free(line);
return 1;
}
static int
setup_barcode_lookup_single(struct axe_config *config)
{
size_t iii = 0;
if (!axe_config_ok(config)) {
return -1;
}
config->n_barcodes_1 = config->n_barcode_pairs;
config->n_barcodes_2 = 0;
config->barcode_lookup = qes_malloc(config->n_barcodes_1 *
sizeof(*config->barcode_lookup));
for (iii = 0; iii < config->n_barcode_pairs; iii++) {
config->barcode_lookup[iii] = qes_malloc(
sizeof(**config->barcode_lookup));
config->barcode_lookup[iii][0] = iii;
}
return 0;
}
static int
setup_barcode_lookup_combo(struct axe_config *config)
{
struct axe_barcode *this_barcode = NULL;
size_t n_barcodes_1 = 0; /* R1 barcodes */
size_t n_barcodes_2 = 0;
size_t iii = 0;
intptr_t tmp = 0;
int ret = -1;
int res = 0;
size_t bcd1 = 0;
size_t bcd2 = 0;
struct axe_trie *seq1_trie = NULL;
struct axe_trie *seq2_trie = NULL;
if (!axe_config_ok(config)) {
return -1;
}
/* Make "hash table" of barcode => unique bcd num. We use tries, as they
work fine as a associatve map, and we've already got the headers, lib
etc in the system. */
seq1_trie = axe_trie_create();
seq2_trie = axe_trie_create();
assert(seq1_trie != NULL && seq2_trie != NULL);
for (iii = 0; iii < config->n_barcode_pairs; iii++) {
this_barcode = config->barcodes[iii];
if (!axe_barcode_ok(this_barcode)) {
qes_log_format_fatal(config->logger,
"setup_lookup -- Bad barcode at %zu\n",
iii);
goto error;
}
if (!axe_trie_get(seq1_trie, this_barcode->seq1, &tmp)) {
axe_trie_add(seq1_trie, this_barcode->seq1, n_barcodes_1++);
}
if (!axe_trie_get(seq2_trie, this_barcode->seq2, &tmp)) {
axe_trie_add(seq2_trie, this_barcode->seq2, n_barcodes_2++);
}
}
config->n_barcodes_1 = n_barcodes_1;
config->n_barcodes_2 = n_barcodes_2;
/* Make barcode lookup */
config->barcode_lookup = qes_malloc(n_barcodes_1 *
sizeof(*config->barcode_lookup));
for (bcd1 = 0; bcd1 < config->n_barcodes_1; bcd1++) {
config->barcode_lookup[bcd1] = qes_calloc(n_barcodes_2,
sizeof(**config->barcode_lookup));
memset(config->barcode_lookup[bcd1], -1,
n_barcodes_2 * sizeof(**config->barcode_lookup));
}
/* Setup barcode lookup */
for (iii = 0; iii < config->n_barcode_pairs; iii++) {
this_barcode = config->barcodes[iii];
/* already checked barcode above */
res = axe_trie_get(seq1_trie, this_barcode->seq1,
(intptr_t *)(&bcd1));
if (!res) goto error;
res = axe_trie_get(seq2_trie, this_barcode->seq2,
(intptr_t *)(&bcd2));
if (!res) goto error;
config->barcode_lookup[bcd1][bcd2] = iii;
}
ret = 0;
exit:
axe_trie_destroy(seq1_trie);
axe_trie_destroy(seq2_trie);
return ret;
error:
ret = 1;
goto exit;
}
int
axe_setup_barcode_lookup(struct axe_config *config)
{
if (!axe_config_ok(config)) {
return -1;
}
if (config->match_combo) {
return setup_barcode_lookup_combo(config);
}
return setup_barcode_lookup_single(config);
}
int
axe_make_tries(struct axe_config *config)
{
if (!axe_config_ok(config)) {
return -1;
}
config->fwd_trie = axe_trie_create();
if (config->fwd_trie == NULL) {
goto error;
}
if (config->match_combo) {
config->rev_trie = axe_trie_create();
if (config->rev_trie == NULL) {
goto error;
}
}
return 0;
error:
qes_log_message_fatal(
config->logger,
"make_tries -- ERROR: axe_trie_create returned NULL\n");
axe_trie_destroy(config->fwd_trie);
if (config->match_combo) {
axe_trie_destroy(config->rev_trie);
}
return 1;
}
static char *
axe_make_file_ext(const struct axe_config *config)
{
if (!axe_config_ok(config)) {
return NULL;
}
if (config->out_compress_level > 0 &&
config->out_compress_level < 10) {
return strdup("fastq.gz");
}
return strdup("fastq");
}
static char *
axe_make_zmode(const struct axe_config *config)
{
if (!axe_config_ok(config)) {
return NULL;
}
if (config->out_compress_level > 0 &&
config->out_compress_level < 10) {
char tmp[3] = "";
snprintf(tmp, 3, "w%d", config->out_compress_level);
return strdup(tmp);
}
return strdup("wT");
}
static inline int
load_tries_combo(struct axe_config *config)
{
int bcd1 = -1;
int bcd2 = -1;
int retval = 0;
char **mutated = NULL;
size_t num_mutated = 0;
int ret = 0;
size_t iii = 0;
size_t jjj = 0;
size_t mmm = 0;
struct axe_barcode *this_bcd = NULL;
intptr_t tmp = 0;
if (!axe_config_ok(config)) {
fprintf(stderr, "[load_tries] Bad config\n");
ret = -1;
goto exit;
}
/* Make mutated barcodes and add to trie */
for (iii = 0; iii < config->n_barcode_pairs; iii++) {
this_bcd = config->barcodes[iii];
if (!axe_barcode_ok(this_bcd)) {
qes_log_format_fatal(config->logger,
"load_tries -- Bad R1 barcode at %zu\n", iii);
ret = -1;
goto exit;
}
/* Either lookup the index of the first read in the barcode table, or
* insert this barcode into the table, storing its index.
* Note the NOT here. */
if (!axe_trie_get(config->fwd_trie, this_bcd->seq1, &tmp)) {
ret = axe_trie_add(config->fwd_trie, this_bcd->seq1, ++bcd1);
if (ret != 0) {
qes_log_format_fatal(config->logger,
"load_tries -- Could not load barcode %s into trie %zu\n",
this_bcd->seq1, iii);
ret = 1;
goto exit;
}
} else {
continue;
}
for (jjj = 1; jjj <= config->mismatches; jjj++) {
/* Do the forwards read barcode */
mutated = hamming_mutate_dna(&num_mutated, this_bcd->seq1,
this_bcd->len1, jjj, 0);
if (mutated == NULL) {
ret = 1;
goto exit;
}
for (mmm = 0; mmm < num_mutated; mmm++) {
ret = axe_trie_add(config->fwd_trie, mutated[mmm], bcd1);
if (ret != 0) {
if (config->permissive) {
if (config->verbosity >= 0) {
qes_log_format_warning(config->logger,
"load_tries -- warning: Will only match to %dmm\n",
(int)jjj - 1);
}
axe_trie_delete(config->fwd_trie, mutated[mmm]);
qes_free(mutated[mmm]);
continue;
}
qes_log_format_fatal(config->logger,
"load_tries -- Barcode %s already in fwd trie (%dmm) %s\n",
mutated[mmm], (int)jjj, this_bcd->seq1);
retval = 1;
goto exit;
}
qes_free(mutated[mmm]);
}
qes_free(mutated);
}
}
/* Ditto for the reverse read */
for (iii = 0; iii < config->n_barcode_pairs; iii++) {
this_bcd = config->barcodes[iii];
/* Likewise for the reverse read index */
if (!axe_trie_get(config->rev_trie, this_bcd->seq2, &tmp)) {
ret = axe_trie_add(config->rev_trie, this_bcd->seq2, ++bcd2);
if (ret != 0) {
qes_log_format_fatal(config->logger,
"load_tries -- Could not load barcode %s into trie %zu\n",
this_bcd->seq2, iii);
retval = 1;
goto exit;
}
} else {
continue;
}
for (jjj = 1; jjj <= config->mismatches; jjj++) {
num_mutated = 0;
mutated = hamming_mutate_dna(&num_mutated, this_bcd->seq2,
this_bcd->len2, jjj, 0);
if (mutated == NULL) {
ret = 1;
goto exit;
}
for (mmm = 0; mmm < num_mutated; mmm++) {
ret = axe_trie_add(config->rev_trie, mutated[mmm], bcd2);
if (ret != 0) {
if (config->permissive) {
if (config->verbosity >= 0) {
qes_log_format_warning(config->logger,
"load_tries -- Will only match %s to %dmm\n",
this_bcd->id, (int)jjj - 1);
}
trie_delete(config->rev_trie->trie,
mutated[mmm]);
qes_free(mutated[mmm]);
continue;
}
qes_log_format_fatal(config->logger,
"load_tries -- Barcode %s already in rev trie (%dmm) %s\n",
mutated[mmm], (int)jjj, this_bcd->seq1);
retval = 1;
goto exit;
}
qes_free(mutated[mmm]);
}
qes_free(mutated);
}
}
/* we got here, so we succeeded. set retval accordingly */
retval = 0;
exit:
if (mutated != NULL) {
for (mmm = 0; mmm < num_mutated; mmm++) {
qes_free(mutated[mmm]);
}
qes_free(mutated);
}
return retval;
}
static inline int
load_tries_single(struct axe_config *config)
{
char **mutated = NULL;
size_t num_mutated = 0;
int ret = 0;
size_t iii = 0;
size_t jjj = 0;
size_t mmm = 0;
intptr_t tmp = 0;
int retval = -1;
struct axe_barcode *this_bcd = NULL;
if (!axe_config_ok(config)) {
fprintf(stderr, "[load_tries] Bad config\n");
return -1;
}
/* Make mutated barcodes and add to trie */
for (iii = 0; iii < config->n_barcode_pairs; iii++) {
this_bcd = config->barcodes[iii];
if (!axe_barcode_ok(this_bcd)) {
fprintf(stderr, "[load_tries] Bad barcode at %zu\n", iii);
return -1;
}
/* Either lookup the index of the first read in the barcode table, or
* insert this barcode into the table, storing its index.
* Note the NOT here. */
if (!axe_trie_get(config->fwd_trie, this_bcd->seq1, &tmp)) {
ret = axe_trie_add(config->fwd_trie, this_bcd->seq1, (int)iii);
if (ret != 0) {
fprintf(stderr,
"ERROR: Could not load barcode %s into trie %zu\n",
this_bcd->seq1, iii);
return 1;
}
} else {
fprintf(stderr, "ERROR: Duplicate barcode %s\n", this_bcd->seq1);
return 1;
}
for (jjj = 1; jjj <= config->mismatches; jjj++) {
mutated = hamming_mutate_dna(&num_mutated, this_bcd->seq1,
this_bcd->len1, jjj, 0);
if (mutated == NULL) {
ret = 1;
goto exit;
}
for (mmm = 0; mmm < num_mutated; mmm++) {
ret = axe_trie_add(config->fwd_trie, mutated[mmm], iii);
if (ret != 0) {
if (config->permissive) {
if (config->verbosity >= 0) {
fprintf(stderr,
"[%s] warning: Will only match to %dmm\n",
__func__, (int)jjj - 1);
}
trie_delete(config->fwd_trie->trie,
mutated[mmm]);
qes_free(mutated[mmm]);
continue;
}
fprintf(stderr,
"[%s] ERROR: Barcode %s already in trie (%dmm)\n",
__func__, mutated[mmm], (int)jjj);
retval = 1;
goto exit;
}
qes_free(mutated[mmm]);
}
qes_free(mutated);
num_mutated = 0;
}
}
/* we got here, so we succeeded */
retval = 0;
exit:
if (mutated != NULL) {
for (mmm = 0; mmm < num_mutated; mmm++) {
qes_free(mutated[mmm]);
}
qes_free(mutated);
}
return retval;
}
int
axe_make_outputs(struct axe_config *config)
{
size_t iii = 0;
char *name_fwd = NULL;
char *name_rev = NULL;
char *file_ext = NULL;
char *zmode = NULL;
struct axe_barcode *this_bcd = NULL;
if (!axe_config_ok(config)) {
fprintf(stderr, "[make_outputs] Bad config\n");
return -1;
}
file_ext = axe_make_file_ext(config);
zmode = axe_make_zmode(config);
config->outputs = qes_calloc(config->n_barcode_pairs,
sizeof(*config->outputs));
/* For each sample, make the filename, make an output */
for (iii = 0; iii < config->n_barcode_pairs; iii++) {
this_bcd = config->barcodes[iii];
/* Open barcode files */
switch (config->out_mode) {
case READS_SINGLE:
name_fwd = _axe_format_outfile_path(config->out_prefixes[0],
this_bcd->id, 1, file_ext);
name_rev = NULL;
break;
case READS_PAIRED:
name_fwd = _axe_format_outfile_path(config->out_prefixes[0],
this_bcd->id, 1, file_ext);
name_rev = _axe_format_outfile_path(config->out_prefixes[1],
this_bcd->id, 2, file_ext);
break;
case READS_INTERLEAVED:
name_fwd = _axe_format_outfile_path(config->out_prefixes[0],
this_bcd->id, 0, file_ext);
name_rev = NULL;
break;
case READS_UNKNOWN:
default:
fprintf(stderr, "[make_outputs] Error: bad output mode %ui\n",
config->out_mode);
goto error;
}
config->outputs[iii] = axe_output_create(name_fwd, name_rev,
config->out_mode, zmode);
if (config->outputs[iii] == NULL) {
fprintf(stderr, "[make_outputs] couldn't create file at %s\n",
name_fwd);
goto error;
}
qes_free(name_fwd);
qes_free(name_rev);
}
/* Generate the unknown file in the same manner, using id == unknown */
switch (config->out_mode) {
case READS_SINGLE:
name_fwd = _axe_format_outfile_path(config->out_prefixes[0],
"unknown", 1, file_ext);
name_rev = NULL;
break;
case READS_PAIRED:
name_fwd = _axe_format_outfile_path(config->out_prefixes[0],
"unknown", 1, file_ext);
name_rev = _axe_format_outfile_path(config->out_prefixes[1],
"unknown", 2, file_ext);
break;
case READS_INTERLEAVED:
name_fwd = _axe_format_outfile_path(config->out_prefixes[0],
"unknown", 0, file_ext);
name_rev = NULL;
break;
case READS_UNKNOWN:
default:
fprintf(stderr, "[make_outputs] Error: bad output mode %ui\n",
config->out_mode);
goto error;
}
config->unknown_output = axe_output_create(name_fwd, name_rev,
config->out_mode, zmode);
if (config->unknown_output == NULL) {
fprintf(stderr, "[make_outputs] couldn't create file at %s\n",
name_fwd);
goto error;
}
qes_free(file_ext);
qes_free(zmode);
return 0;
error:
qes_free(name_fwd);
qes_free(name_rev);
qes_free(file_ext);
qes_free(zmode);
return 1;
}
int
axe_load_tries(struct axe_config *config)
{
int ret = 1;
if (!axe_config_ok(config)) {
return -1;
}
if (config->match_combo) {
ret = load_tries_combo(config);
} else {
ret = load_tries_single(config);
}
if (config->verbosity > 0) {
fprintf(stderr, "[load_tries] (%s) Barcode tries loaded\n",
nowstr());
}
return ret;
}
static inline int
write_barcoded_read_combo(struct axe_output *out, struct qes_seq *seq1,
struct qes_seq *seq2, size_t bcd1_len,
size_t bcd2_len)
{
int ret = 0;
if (seq1->seq.len <= bcd1_len) {
/* Truncate seqs to N */
seq1->seq.str[0] = 'N';
seq1->seq.str[1] = '\0';
seq1->seq.len = 1;
/* Keep first qual 'base' */
seq1->qual.str[1] = '\0';
seq1->qual.len = 1;
}
if (seq2->seq.len <= bcd2_len) {
/* Truncate seqs to N */
seq2->seq.str[0] = 'N';
seq2->seq.str[1] = '\0';
seq2->seq.len = 1;
/* Keep first qual 'base' */
seq2->qual.str[1] = '\0';
seq2->qual.len = 1;
}
/* Bit of the ol' switcheroo. We keep the seq's char pointers, so we
need to switch them back to their orig. values, but don't want to
copy. Kludgy, I know. */
seq1->seq.str += bcd1_len;
seq1->seq.len -= bcd1_len;
seq1->qual.str += bcd1_len;
seq1->qual.len -= bcd1_len;
ret = qes_seqfile_write(out->fwd_file, seq1);
if (ret < 1) {
fprintf(stderr,
"[process_file] Error: writing to fwd file %s failed\n%s\n",
out->fwd_file->qf->path,
qes_file_error(out->fwd_file->qf));
seq1->seq.str -= bcd1_len;
seq1->seq.len += bcd1_len;
seq1->qual.str -= bcd1_len;
seq1->qual.len += bcd1_len;
return 1;
}
seq1->seq.str -= bcd1_len;
seq1->seq.len += bcd1_len;
seq1->qual.str -= bcd1_len;
seq1->qual.len += bcd1_len;
seq2->seq.str += bcd2_len;
seq2->seq.len -= bcd2_len;
seq2->qual.str += bcd2_len;
seq2->qual.len -= bcd2_len;
if (out->mode == READS_INTERLEAVED) {
ret = qes_seqfile_write(out->fwd_file, seq2);
if (ret < 1) {
fprintf(stderr,
"[process_file] Error: writing to il file %s failed\n%s\n",
out->fwd_file->qf->path,
qes_file_error(out->fwd_file->qf));
return 1;
}
} else if (out->mode == READS_PAIRED) {
ret = qes_seqfile_write(out->rev_file, seq2);
if (ret < 1) {
fprintf(stderr,
"process_file -- Error: writing to rev file %s failed\n%s\n",
out->rev_file->qf->path,
qes_file_error(out->rev_file->qf));
return 1;
}
}
seq2->seq.str -= bcd2_len;
seq2->seq.len += bcd2_len;
seq2->qual.str -= bcd2_len;
seq2->qual.len += bcd2_len;
return 0;
}
static inline void
increment_reads_print_progress(struct axe_config *config)
{
config->reads_processed++;
if (config->reads_processed % 100000 == 0) {
if (config->verbosity >= 0) {
axe_format_progress(config->logger,
"%s: Processed %.1fM %s\r",
nowstr(),
(float)(config->reads_processed/1000000.0),
config->out_mode == READS_SINGLE ? "reads" : "read pairs");
}
}
}
static inline int
process_read_pair_single(struct axe_config *config, struct qes_seq *seq1,
struct qes_seq *seq2)
{
int ret = 0;
ssize_t bcd = -1;
size_t barcode_pair_index = 0;
struct axe_output *outfile = NULL;
size_t bcd_len = 0;
ret = axe_match_read(config, &bcd, config->fwd_trie, seq1);
increment_reads_print_progress(config);
if (ret != 0) {
/* No match */
qes_seqfile_write(config->unknown_output->fwd_file, seq1);
if (seq2 != NULL) {
if (config->out_mode == READS_INTERLEAVED) {
qes_seqfile_write(config->unknown_output->fwd_file, seq2);
} else {
qes_seqfile_write(config->unknown_output->rev_file, seq2);
}
}
config->reads_failed++;
return 0;
}
/* Found a match */
config->reads_demultiplexed++;
/* FIXME: we need to check bcd doesn't cause segfault */
barcode_pair_index = config->barcode_lookup[bcd][0];
outfile = config->outputs[barcode_pair_index];
bcd_len = config->barcodes[barcode_pair_index]->len1;
config->barcodes[bcd]->count++;
if (seq1->seq.len <= bcd_len) {
/* Don't write out seqs shorter than the barcode */
return 0;
}
/* Bit of the ol' switcheroo. We keep the seq's char pointers, so we need
* to switch them back to their orig. values, but don't want to copy.
* Kludgy, I know. */
seq1->seq.str += bcd_len;
seq1->seq.len -= bcd_len;
seq1->qual.str += bcd_len;
seq1->qual.len -= bcd_len;
ret = qes_seqfile_write(outfile->fwd_file, seq1);
if (ret < 1) {
fprintf(stderr,
"[write_read_single] Error: writing to R1 file %s failed\n%s\n",
outfile->fwd_file->qf->path,
qes_file_error(outfile->fwd_file->qf));
seq1->seq.str -= bcd_len;
seq1->seq.len += bcd_len;
seq1->qual.str -= bcd_len;
seq1->qual.len += bcd_len;
return 1;
}
seq1->seq.str -= bcd_len;
seq1->seq.len += bcd_len;
seq1->qual.str -= bcd_len;
seq1->qual.len += bcd_len;
/* And do the same with seq2, if we have one */
if (seq2 != NULL) {
if (config->trim_rev) {
seq2->seq.str += bcd_len;
seq2->seq.len -= bcd_len;
seq2->qual.str += bcd_len;
seq2->qual.len -= bcd_len;
}
if (outfile->mode == READS_INTERLEAVED) {
ret = qes_seqfile_write(outfile->fwd_file, seq2);
if (ret < 1) {
qes_log_format_fatal(
config->logger,
"process_file -- Writing to file %s failed\n%s\n",
outfile->fwd_file->qf->path,
qes_file_error(outfile->fwd_file->qf));
return 1;
}
} else if (outfile->mode == READS_PAIRED) {
ret = qes_seqfile_write(outfile->rev_file, seq2);
if (ret < 1) {
qes_log_format_fatal(
config->logger,
"process_file -- Writing to file %s failed\n%s\n",
outfile->rev_file->qf->path,
qes_file_error(outfile->rev_file->qf));
return 1;
}
}
if (config->trim_rev) {
seq2->seq.str -= bcd_len;
seq2->seq.len += bcd_len;
seq2->qual.str -= bcd_len;
seq2->qual.len += bcd_len;
}
}
return 0;
}
static int
process_file_single(struct axe_config *config)
{
struct qes_seqfile *fwdsf = NULL;
struct qes_seqfile *revsf = NULL;
int ret = 0;
int retval = -1;
if (!axe_config_ok(config)) {
return -1;
}
fwdsf = qes_seqfile_create(config->infiles[0], "r");
if (fwdsf == NULL) {
qes_log_format_fatal(config->logger,
"process_file -- Couldn't open seqfile %s\n",
config->infiles[0]);
goto exit;
}
switch(config->in_mode) {
case READS_SINGLE:
goto single;
break;
case READS_INTERLEAVED:
goto interleaved;
break;
case READS_PAIRED:
revsf = qes_seqfile_create(config->infiles[1], "r");
if (revsf == NULL) {
qes_log_format_fatal(config->logger,
"process_file -- Couldn't open seqfile %s\n",
config->infiles[1]);
goto exit;
}
goto paired;
break;
case READS_UNKNOWN:
default:
qes_log_format_fatal(config->logger,
"process_file_single -- Bad infile mode %u\n",
config->in_mode);
goto exit;
break;
}
single:
QES_SEQFILE_ITER_SINGLE_BEGIN(fwdsf, seq, seqlen) {
ret = process_read_pair_single(config, seq, NULL);
}
QES_SEQFILE_ITER_SINGLE_END(seq);
retval = ret == 0 ? 0 : 1;
goto exit;
interleaved:
QES_SEQFILE_ITER_INTERLEAVED_BEGIN(fwdsf, seq1, seq2, seqlen1, seqlen2) {
ret = process_read_pair_single(config, seq1, seq2);
}
QES_SEQFILE_ITER_INTERLEAVED_END(seq1, seq2);
retval = ret == 0 ? 0 : 1;
goto exit;
paired:
QES_SEQFILE_ITER_PAIRED_BEGIN(fwdsf, revsf, seq1, seq2, seqlen1, seqlen2) {
ret = process_read_pair_single(config, seq1, seq2);
}
QES_SEQFILE_ITER_PAIRED_END(seq1, seq2);
retval = ret == 0 ? 0 : 1;
goto exit;
exit:
qes_seqfile_destroy(fwdsf);
qes_seqfile_destroy(revsf);
return retval;
}
static int
process_read_pair_combo(struct axe_config *config, struct qes_seq *seq1,
struct qes_seq *seq2)
{
ssize_t barcode_pair_index = 0;
intptr_t bcd1 = -1;
intptr_t bcd2 = -1;
int r1_ret = 0;
int r2_ret = 0;
size_t bcd1_len = 0;
size_t bcd2_len = 0;
struct axe_output *outfile = NULL;
r1_ret = axe_match_read(config, &bcd1, config->fwd_trie, seq1);
r2_ret = axe_match_read(config, &bcd2, config->rev_trie, seq2);
increment_reads_print_progress(config);
if (r1_ret != 0 || r2_ret != 0) {
/* No match */
qes_seqfile_write(config->unknown_output->fwd_file, seq1);
if (config->out_mode == READS_INTERLEAVED) {
qes_seqfile_write(config->unknown_output->fwd_file, seq2);
} else {
qes_seqfile_write(config->unknown_output->rev_file, seq2);
}
config->reads_failed++;
return 0;
}
/* Found a match */
barcode_pair_index = config->barcode_lookup[bcd1][bcd2];
if (barcode_pair_index < 0) {
/* Invalid match */
qes_seqfile_write(config->unknown_output->fwd_file, seq1);
if (config->out_mode == READS_INTERLEAVED) {
qes_seqfile_write(config->unknown_output->fwd_file, seq2);
} else {
qes_seqfile_write(config->unknown_output->rev_file, seq2);
}
config->reads_failed++;
return 0;
}
config->reads_demultiplexed++;
outfile = config->outputs[barcode_pair_index];
bcd1_len = config->barcodes[barcode_pair_index]->len1;
bcd2_len = config->barcodes[barcode_pair_index]->len2;
config->barcodes[barcode_pair_index]->count++;
return write_barcoded_read_combo(outfile, seq1, seq2, bcd1_len,
bcd2_len);
}
static int
process_file_combo(struct axe_config *config)
{
struct qes_seqfile *fwdsf = NULL;
struct qes_seqfile *revsf = NULL;
int have_error = 0;
if (!axe_config_ok(config)) {
return -1;
}
fwdsf = qes_seqfile_create(config->infiles[0], "r");
if (fwdsf == NULL) {
qes_log_format_fatal(config->logger,
"process_file -- Couldn't open seqfile %s\n",
config->infiles[0]);
goto error;
}
switch(config->in_mode) {
case READS_INTERLEAVED:
goto interleaved;
break;
case READS_PAIRED:
revsf = qes_seqfile_create(config->infiles[1], "r");
if (revsf == NULL) {
qes_log_format_fatal(config->logger,
"process_file -- Couldn't open seqfile %s\n",
config->infiles[0]);
goto error;
}
goto paired;
break;
case READS_SINGLE:
case READS_UNKNOWN:
default:
qes_log_format_fatal(config->logger,
"process_file_combo -- Bad infile mode %u\n",
config->in_mode);
goto error;
break;
}
interleaved:
QES_SEQFILE_ITER_INTERLEAVED_BEGIN(fwdsf, seq1, seq2, seqlen1, seqlen2)
if (process_read_pair_combo(config, seq1, seq2)) {
have_error = 1;
break;
}
QES_SEQFILE_ITER_INTERLEAVED_END(seq1, seq2)
if (!have_error) goto clean_exit;
else goto error;
paired:
QES_SEQFILE_ITER_PAIRED_BEGIN(fwdsf, revsf, seq1, seq2, seqlen1, seqlen2)
if (process_read_pair_combo(config, seq1, seq2)) {
have_error = 1;
break;
}
QES_SEQFILE_ITER_PAIRED_END(seq1, seq2)
if (!have_error) goto clean_exit;
else goto error;
clean_exit:
qes_seqfile_destroy(fwdsf);
qes_seqfile_destroy(revsf);
return 0;
error:
qes_seqfile_destroy(fwdsf);
qes_seqfile_destroy(revsf);
return 1;
}
int
axe_process_file(struct axe_config *config)
{
int ret = 0;
clock_t start = 0;
if (!axe_config_ok(config)) {
return -1;
}
start = clock();
if (config->verbosity >= 0) {
axe_format_bold(config->logger,
"process_file -- (%s) Starting demultiplexing\n",
nowstr());
}
if (config->match_combo) {
ret = process_file_combo(config);
} else {
ret = process_file_single(config);
}
config->time_taken = (float)(clock() - start) / CLOCKS_PER_SEC;
if (config->verbosity >= 0) {
/* Jump to new line so we don't clobber the progress bar */
fprintf(stderr, "\n");
axe_format_bold(config->logger,
"process_file -- (%s) Finished demultiplexing\n",
nowstr());
}
return ret;
}
int
product(int64_t len, int64_t elem, uintptr_t *choices, int at_start)
{
ssize_t iii = 0;
if (len < elem || choices == NULL) {
/* error value, so don't use (!ret) as your test for the end of the
enclosing while loop, or on error you'll have an infinite loop */
return -1;
}
if (at_start) {
/* [0, 0, ..., 0] is a valid set */
return 1;
}
iii = elem - 1;
while (iii >= 0) {
if (choices[iii] < (uintptr_t)(len - 1)) {
/* Woo, we've found something to increment. */
ssize_t jjj;
/* Increment this choice */
choices[iii]++;
/* fill forwards with 0. */
for (jjj = iii + 1; jjj < elem; jjj++) {
choices[jjj] = 0;
}
return 1;
}
iii--;
}
for (iii = 0; iii < elem; iii++) {
choices[iii] = 0llu;
}
return 0;
}
char **
hamming_mutate_dna(size_t *n_results_o, const char *str, size_t len,
unsigned int dist, int keep_original)
{
const char alphabet[] = "ACGT";
const size_t n_letters = 4;
char *tmp = NULL;
char **result = NULL;
size_t results = 0;
size_t results_alloced = 64;
size_t iii;
uintptr_t *alphabet_indicies;
int alpha_ret = 0;
gsl_combination *mut_idx_comb;
if (str == NULL || len < 1 || dist < 1) {
return NULL;
}
result = qes_malloc(results_alloced * sizeof(*result));
alphabet_indicies = qes_calloc(dist, sizeof(*alphabet_indicies));
mut_idx_comb = gsl_combination_calloc(len, dist);
do {
while ((alpha_ret = product(n_letters, dist, alphabet_indicies,
!alpha_ret)) == 1) {
tmp = strndup(str, len+1);
for (iii = 0; iii < dist; iii++) {
char replacement = alphabet[alphabet_indicies[iii]];
size_t mut_idx = gsl_combination_get(mut_idx_comb, iii);
if (tmp[mut_idx] == replacement) {
continue;
}
tmp[mut_idx] = replacement;
}
if (strncmp(str, tmp, len) == 0 && !keep_original) {
qes_free(tmp);
continue;
} else {
if (results + 1 > results_alloced) {
results_alloced = qes_roundupz(results_alloced);
result = qes_realloc(result,
results_alloced * sizeof(*result));
}
result[results++] = strndup(tmp, len);
qes_free(tmp);
}
}
} while (gsl_combination_next(mut_idx_comb) == GSL_SUCCESS);
gsl_combination_free(mut_idx_comb);
qes_free(alphabet_indicies);
*n_results_o = results;
return result;
}
struct axe_trie *
axe_trie_create(void)
{
struct axe_trie *trie = NULL;
AlphaMap *map = alpha_map_new();
int ret = 0;
/* Make trie AlphaMap */
if (map == NULL) {
return NULL;
}
#define _AM_ADD(chr) \
ret = alpha_map_add_range(map, chr, chr); \
if (ret != 0) { \
fprintf(stderr, "[trie_create] Failed to add char %c to alphamap\n",\
chr); \
alpha_map_free(map); \
return NULL; \
}
_AM_ADD('A')
_AM_ADD('C')
_AM_ADD('G')
_AM_ADD('T')
_AM_ADD('N')
#undef _AM_ADD
trie = qes_calloc(1, sizeof(*trie));
trie->trie = trie_new(map);
if (trie->trie == NULL) {
qes_free(trie);
alpha_map_free(map);
return NULL;
}
alpha_map_free(map);
return trie;
}
void
axe_trie_destroy_(struct axe_trie *trie)
{
if (trie != NULL) {
/* trie_free doesn't check for null, so we better */
if (trie->trie != NULL) {
trie_free(trie->trie);
}
qes_free(trie);
}
}
inline int
axe_trie_get(struct axe_trie *trie, const char *str, intptr_t *data)
{
if (!axe_trie_ok(trie) || str == NULL) return -1;
return trie_retrieve(trie->trie, str, data);
}
inline int
axe_trie_delete(struct axe_trie *trie, const char *str)
{
if (!axe_trie_ok(trie) || str == NULL) return -1;
return trie_delete(trie->trie, str);
}
inline int
axe_trie_add(struct axe_trie *trie, const char *str, intptr_t data)
{
if (!axe_trie_ok(trie) || str == NULL) return -1;
if (trie_store_if_absent(trie->trie, str, data)) {
return 0;
}
return 1;
}
inline int
axe_match_read (struct axe_config *config, ssize_t *value,
struct axe_trie *trie, const struct qes_seq *seq)
{
TrieState *trie_iter = NULL;
TrieState *last_good_state = NULL;
size_t seq_pos = 0;
/* value is set to -1 on anything bad happening including failed lookup */
if (value == NULL || !axe_trie_ok(trie) || !qes_seq_ok(seq)) {
return -1;
}
/* Set *value here, then we just don't update it on error */
*value = -1;
if (seq->seq.len < trie->min_len) {
return 1;
}
/* Only look until the maximum of the largest barcode, or seq len */
/* Grab tree root iter, and check it. */
trie_iter = trie_root(trie->trie);
if (trie_iter == NULL) {
qes_log_message_fatal(config->logger,
"match_read -- trie_root() returned NULL!\n");
return -1;
}
/* Consume seq until we can't */
do {
trie_state_walk(trie_iter, seq->seq.str[seq_pos]);
if (trie_state_is_terminal(trie_iter)) {
if (last_good_state != NULL) {
trie_state_free(last_good_state);
}
last_good_state = trie_state_clone(trie_iter);
}
} while (trie_state_is_walkable(trie_iter, seq->seq.str[++seq_pos]));
/* If we get to a terminal state, then great! */
if (trie_state_is_terminal(trie_iter)) {
trie_state_walk(trie_iter, '\0');
trie_state_free(last_good_state);
*value = (ssize_t) trie_state_get_data(trie_iter);
trie_state_free(trie_iter);
return 0;
} else if (last_good_state != NULL) {
trie_state_free(trie_iter);
trie_state_walk(last_good_state, '\0');
*value = (ssize_t) trie_state_get_data(last_good_state);
trie_state_free(last_good_state);
return 0;
}
trie_state_free(trie_iter);
if (last_good_state != NULL) {
trie_state_free(last_good_state);
}
return 1;
}
int
axe_write_table(const struct axe_config *config)
{
FILE *tab_fp = NULL;
struct axe_barcode *this_bcd = NULL;
size_t iii = 0;
int res = 0;
if (!axe_config_ok(config)) {
return -1;
}
if (config->table_file == NULL) {
/* we always call this function in the main loop, so we bail out here
if we don't have a file to write it to. */
return 0;
}
tab_fp = fopen(config->table_file, "w");
if (tab_fp == NULL) {
qes_log_format_fatal(config->logger, "write_table -- ERROR: Could not open %s\n%s\n",
config->table_file, strerror(errno));
return 1;
}
if (config->match_combo) {
fprintf(tab_fp, "R1Barcode\tR2Barcode\tSample\tCount\n");
} else {
fprintf(tab_fp, "Barcode\tSample\tCount\n");
}
for (iii = 0; iii < config->n_barcode_pairs; iii++) {
this_bcd = config->barcodes[iii];
if (config->match_combo) {
fprintf(tab_fp, "%s\t%s\t%s\t%" PRIu64 "\n", this_bcd->seq1,
this_bcd->seq2, this_bcd->id, this_bcd->count);
} else {
fprintf(tab_fp, "%s\t%s\t%" PRIu64 "\n", this_bcd->seq1,
this_bcd->id, this_bcd->count);
}
}
if (config->match_combo) {
fprintf(tab_fp, "N\tN\tNo Barcode\t%" PRIu64 "\n",
config->reads_failed);
} else {
fprintf(tab_fp, "N\tNo Barcode\t%" PRIu64 "\n", config->reads_failed);
}
res = fclose(tab_fp);
if (res != 0) {
qes_log_format_error(config->logger,
"[write_table] Couldn't close tab file %s\n%s\n",
config->table_file, strerror(errno));
return 1;
}
return 0;
}
int
axe_print_summary(const struct axe_config *config)
{
const char *tmp;
#define million(r) ((float)(r / 1000000.0))
if (!axe_config_ok(config)) {
return -1;
}
if (config->verbosity < 0) {
/* Say nothing if we're being quiet */
return 0;
}
axe_message_bold(config->logger, "\nRun Summary:\n");
if (config->verbosity > 1) {
qes_log_message_debug(config->logger,
"Being verbose (not that you'll notice)\n");
}
tmp = config->out_mode == READS_SINGLE ? "reads" : "read pairs";
axe_format_bold(config->logger,
"Processed %.2fM %s in %0.1f seconds (%0.1fK %s/sec)\n",
million(config->reads_processed), tmp, config->time_taken,
(float)(config->reads_processed / 1000) / config->time_taken, tmp);
axe_format_bold(config->logger,
"%.2fM %s contained valid barcodes\n",
million(config->reads_demultiplexed), tmp);
axe_format_bold(config->logger,
"%.2fM %s could not be demultiplexed (%0.1f%%)\n",
million(config->reads_failed), tmp,
((float)config->reads_failed/(float)(config->reads_processed)*100.0));
return 0;
}
axe-0.3.1/src/axe.h 0000664 0000000 0000000 00000024154 12635670016 0014021 0 ustar 00root root 0000000 0000000 /*
* ============================================================================
*
* Filename: axe.h
* Description: Demultiplex reads by 5' barcodes
* Copyright: 2014-2015 Kevin Murray
* License: GNU GPL v3+
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*
* ============================================================================
*/
#ifndef AXE_H
#define AXE_H
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "datrie/trie.h"
#include "datrie/alpha-map.h"
#include "axe_config.h"
/* General rules:
* Most functions are declared as `int X(...);`. These functions return:
* -1 on parameter error (NULLs, bad values etc)
* 0 on success
* 1 on failure
* Thus, one can check success with ret = X(...); if (ret != 0) {panic();}
* If a function returns a pointer, NULL is the error value.
*/
enum read_mode {
READS_UNKNOWN = 0,
READS_SINGLE = 1,
READS_PAIRED = 2,
READS_INTERLEAVED = 3,
};
struct axe_output {
struct qes_seqfile *fwd_file;
struct qes_seqfile *rev_file;
enum read_mode mode;
};
struct axe_trie {
Trie *trie; /* From datrie.h */
int mismatch_level;
size_t max_len;
size_t min_len;
};
struct axe_barcode {
char *seq1;
char *seq2;
char *id;
size_t len1;
size_t len2;
size_t idlen;
uint64_t count;
};
struct axe_config {
char *barcode_file;
char *table_file;
char *infiles[2];
char *out_prefixes[2];
struct axe_barcode **barcodes;
struct axe_output **outputs;
/* Array of output files. Access by bcd_lookup[1st_bcd_idx][2nd_bcd_idx]
Values will be 0 <= x < n_barcode_pairs. barcodes or outputs can then
be indexed w/ this number */
ssize_t **barcode_lookup;
size_t *mismatch_counts;
size_t n_barcodes_1; /* Number of first read barcodes */
size_t n_barcodes_2; /* Number of second read barcodes */
size_t n_barcode_pairs;
struct axe_output *unknown_output; /* output for unknown files */
struct axe_trie *fwd_trie;
struct axe_trie *rev_trie;
struct qes_logger *logger;
enum read_mode in_mode;
enum read_mode out_mode;
int out_compress_level;
size_t mismatches;
uint64_t reads_processed;
uint64_t reads_demultiplexed;
uint64_t reads_failed;
float time_taken;
int verbosity;
int have_cli_opts :1; /* Set to 1 once CLI is parsed */
int match_combo :1; /* Match using combinatorial strategy */
int permissive :1; /* Don't error on mutated bcd confict */
int trim_rev :1; /* Trim rev read same as fwd read */
int debug :1; /* Enable debug mode */
};
extern unsigned int format_call_number;
char *
axe_formatter(struct qes_log_entry *entry);
#define AXE_LOG_PROGRESS 11
#define axe_format_progress(log, fmt, ...) \
qes_log_format(log, AXE_LOG_PROGRESS, fmt, __VA_ARGS__)
#define axe_message_progress(log, msg) \
qes_log_message(log, AXE_LOG_PROGRESS, msg)
#define AXE_LOG_BOLD 12
#define axe_format_bold(log, fmt, ...) \
qes_log_format(log, AXE_LOG_BOLD, fmt, __VA_ARGS__)
#define axe_message_bold(log, msg) \
qes_log_message(log, AXE_LOG_BOLD, msg)
static inline int
axe_config_ok(const struct axe_config *config)
{
if (config == NULL) return 0;
return 1;
}
static inline int
axe_trie_ok(const struct axe_trie *trie)
{
if (trie == NULL) return 0;
if (trie->trie == NULL) return 0;
if (trie->min_len > trie->max_len) return 0;
return 1;
}
static inline int
axe_barcode_ok(const struct axe_barcode *barcode)
{
if (barcode == NULL) return 0;
if (barcode->seq1 == NULL || barcode->len1 == 0) return 0;
if (barcode->id == NULL || barcode->idlen == 0) return 0;
return 1;
}
static inline int
axe_barcode_ok_combo(const struct axe_barcode *barcode)
{
if (barcode == NULL) return 0;
if (barcode->seq1 == NULL || barcode->len1 == 0) return 0;
if (barcode->seq2 == NULL || barcode->len2 == 0) return 0;
if (barcode->id == NULL || barcode->idlen == 0) return 0;
return 1;
}
static inline int
axe_output_ok(const struct axe_output *output)
{
if (output == NULL) return 0;
if (output->mode == READS_UNKNOWN) return 0;
if (output->fwd_file == NULL) return 0;
if (output->mode == READS_PAIRED && output->rev_file == NULL) return 0;
return 1;
}
/*=== FUNCTION ============================================================*
Name: axe_config_create
Parameters: void
Description: Create a struct axe_config on the heap and initialise members
to empty/null values.
Returns: struct axe_config *: A valid, empty struct axe_config, or NULL
on any error.
*===========================================================================*/
struct axe_config *axe_config_create(void);
/*=== FUNCTION ============================================================*
Name: axe_config_destroy
Parameters: struct axe_config *: config struct on heap to destroy.
Description: Destroy a ``struct axe_config`` on the heap, and set its
pointer variable to NULL;
Returns: void
*===========================================================================*/
void axe_config_destroy_(struct axe_config *config);
#define axe_config_destroy(cfg) STMT_BEGIN \
axe_config_destroy_(cfg); \
cfg = NULL; \
STMT_END
/*=== FUNCTION ============================================================*
Name: axe_output_create
Parameters: const char *fwd_fpath: Forwards/interleaved read filepath
const char *rev_fpath: Reverse read filepath
enum read_mode mode: Output mode
const char *fp_mode: qes_fopen() mode specifier. See
/usr/include/zlib.h for valid values.
Description: Creates and opens file members of a struct axe_output
Returns: struct axe_output *: A valid struct axe_output, or NULL on
failure of any kind
*===========================================================================*/
struct axe_output *axe_output_create(const char *fwd_fpath,
const char *rev_fpath, enum read_mode mode, const char *fp_mode);
/*=== FUNCTION ============================================================*
Name: axe_output_destroy
Parameters: struct axe_output *: output struct on heap to destroy.
Description: Destroy a ``struct axe_output`` on the heap, and set its
pointer variable to NULL;
Returns: void
*===========================================================================*/
void axe_output_destroy_(struct axe_output *output);
#define axe_output_destroy(out) STMT_BEGIN \
axe_output_destroy_(out); \
out = NULL; \
STMT_END
struct axe_trie *axe_trie_create(void);
extern int axe_trie_get(struct axe_trie *trie, const char *str,
intptr_t *data);
extern int axe_trie_add(struct axe_trie *trie, const char *str,
intptr_t data);
extern int axe_trie_delete(struct axe_trie *trie, const char *str);
/*=== FUNCTION ============================================================*
Name: axe_trie_destroy
Parameters: struct axe_trie *: trie struct on heap to destroy.
Description: Destroy a ``struct axe_trie`` on the heap, and set its
pointer variable to NULL;
Returns: void
*===========================================================================*/
void axe_trie_destroy_(struct axe_trie *trie);
#define axe_trie_destroy(trie) STMT_BEGIN \
axe_trie_destroy_(trie); \
trie = NULL; \
STMT_END
struct axe_barcode *axe_barcode_create(void);
void axe_barcode_destroy_(struct axe_barcode *barcode);
#define axe_barcode_destroy(barcode) STMT_BEGIN \
axe_barcode_destroy_(barcode); \
barcode = NULL; \
STMT_END
/* This is the processing pipeline. These functions should be run in this
order */
int axe_read_barcodes(struct axe_config *config);
int axe_setup_barcode_lookup(struct axe_config *config);
int axe_make_tries(struct axe_config *config);
int axe_load_tries(struct axe_config *config);
int axe_make_outputs(struct axe_config *config);
int axe_process_file(struct axe_config *config);
int axe_write_table(const struct axe_config *config);
int axe_print_summary(const struct axe_config *config);
/* Libraries or inner functions */
extern int axe_match_read(struct axe_config *config, intptr_t *value,
struct axe_trie *trie, const struct qes_seq *seq);
int product(int64_t len, int64_t elem, uintptr_t *choices, int at_start);
char **hamming_mutate_dna(size_t *n_results_o, const char *str, size_t len,
unsigned int dist, int keep_original);
extern char _time_now[];
static inline const char *
nowstr(void)
{
time_t rawtime;
time(&rawtime);
strftime(_time_now, 10, "%H:%M:%S", localtime(&rawtime));
return _time_now;
}
#endif /* AXE_H */
axe-0.3.1/src/axe_config.h.in 0000664 0000000 0000000 00000002134 12635670016 0015745 0 ustar 00root root 0000000 0000000 /*
* ============================================================================
*
* Filename: axe_config.h.in
* Description: Pull in build system definitions
* Copyright: 2014-2015 Kevin Murray
* License: GNU GPL v3+
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*
* ============================================================================
*/
#ifndef AXE_CONFIG_H
#define AXE_CONFIG_H
#define AXE_VERSION "${AXE_VERSION}"
#endif /* AXE_CONFIG_H */
axe-0.3.1/src/datrie/ 0000775 0000000 0000000 00000000000 12635670016 0014335 5 ustar 00root root 0000000 0000000 axe-0.3.1/src/datrie/alpha-map-private.h 0000664 0000000 0000000 00000003335 12635670016 0020022 0 ustar 00root root 0000000 0000000 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* libdatrie - Double-Array Trie Library
* Copyright (C) 2006 Theppitak Karoonboonyanan
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* alpha-map-private.h - private APIs for alpha-map
* Created: 2008-12-04
* Author: Theppitak Karoonboonyanan
*/
#ifndef __ALPHA_MAP_PRIVATE_H
#define __ALPHA_MAP_PRIVATE_H
#include
#include "alpha-map.h"
TrieIndex alpha_map_char_to_trie (const AlphaMap *alpha_map,
AlphaChar ac);
AlphaChar alpha_map_trie_to_char (const AlphaMap *alpha_map,
TrieChar tc);
TrieChar * alpha_map_char_to_trie_str (const AlphaMap *alpha_map,
const AlphaChar *str);
AlphaChar * alpha_map_trie_to_char_str (const AlphaMap *alpha_map,
const TrieChar *str);
#endif /* __ALPHA_MAP_PRIVATE_H */
/*
vi:ts=4:ai:expandtab
*/
axe-0.3.1/src/datrie/alpha-map.c 0000664 0000000 0000000 00000022016 12635670016 0016342 0 ustar 00root root 0000000 0000000 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* libdatrie - Double-Array Trie Library
* Copyright (C) 2006 Theppitak Karoonboonyanan
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* alpha-map.c - map between character codes and trie alphabet
* Created: 2006-08-19
* Author: Theppitak Karoonboonyanan
*/
#include
#include
#include
#include
#include
#include "alpha-map.h"
#include "alpha-map-private.h"
/**
* @brief Alphabet string length
*
* @param str : the array of null-terminated AlphaChar string to measure
*
* @return the total characters in @a str.
*/
int
alpha_char_strlen (const AlphaChar *str)
{
const AlphaChar *p;
for (p = str; *p; p++)
;
return p - str;
}
/**
* @brief Compare alphabet strings
*
* @param str1, str2 : the arrays of null-terminated AlphaChar strings
* to compare
*
* @return negative if @a str1 < @a str2;
* 0 if @a str1 == @a str2;
* positive if @a str1 > @a str2
*
* Available since: 0.2.7
*/
int
alpha_char_strcmp (const AlphaChar *str1, const AlphaChar *str2)
{
while (*str1 && *str1 == *str2) {
str1++; str2++;
}
if (*str1 < *str2)
return -1;
if (*str1 > *str2)
return 1;
return 0;
}
/*------------------------------*
* PRIVATE DATA DEFINITONS *
*------------------------------*/
typedef struct _AlphaRange {
struct _AlphaRange *next;
AlphaChar begin;
AlphaChar end;
} AlphaRange;
struct _AlphaMap {
AlphaRange *first_range;
};
/*-----------------------------*
* METHODS IMPLEMENTAIONS *
*-----------------------------*/
#define ALPHAMAP_SIGNATURE 0xD9FCD9FC
/* AlphaMap Header:
* - INT32: signature
* - INT32: total ranges
*
* Ranges:
* - INT32: range begin
* - INT32: range end
*/
/**
* @brief Create new alphabet map
*
* @return a pointer to the newly created alphabet map, NULL on failure
*
* Create a new empty alphabet map. The map contents can then be added with
* alpha_map_add_range().
*
* The created object must be freed with alpha_map_free().
*/
AlphaMap *
alpha_map_new (void)
{
AlphaMap *alpha_map;
alpha_map = (AlphaMap *) malloc (sizeof (AlphaMap));
if (!alpha_map)
return NULL;
alpha_map->first_range = NULL;
return alpha_map;
}
/**
* @brief Create a clone of alphabet map
*
* @param a_map : the source alphabet map to clone
*
* @return a pointer to the alphabet map clone, NULL on failure
*
* The created object must be freed with alpha_map_free().
*/
AlphaMap *
alpha_map_clone (const AlphaMap *a_map)
{
AlphaMap *alpha_map;
AlphaRange *range;
alpha_map = alpha_map_new ();
if (!alpha_map)
return NULL;
for (range = a_map->first_range; range; range = range->next) {
if (alpha_map_add_range (alpha_map, range->begin, range->end) != 0) {
alpha_map_free (alpha_map);
return NULL;
}
}
return alpha_map;
}
/**
* @brief Free an alphabet map object
*
* @param alpha_map : the alphabet map object to free
*
* Destruct the @a alpha_map and free its allocated memory.
*/
void
alpha_map_free (AlphaMap *alpha_map)
{
AlphaRange *p, *q;
p = alpha_map->first_range;
while (p) {
q = p->next;
free (p);
p = q;
}
free (alpha_map);
}
/**
* @brief Add a range to alphabet map
*
* @param alpha_map : the alphabet map object
* @param begin : the first character of the range
* @param end : the last character of the range
*
* @return 0 on success, non-zero on failure
*
* Add a range of character codes from @a begin to @a end to the
* alphabet set.
*/
int
alpha_map_add_range (AlphaMap *alpha_map, AlphaChar begin, AlphaChar end)
{
AlphaRange *q, *r, *begin_node, *end_node;
if (begin > end)
return -1;
begin_node = end_node = 0;
/* Skip first ranges till 'begin' is covered */
for (q = 0, r = alpha_map->first_range;
r && r->begin <= begin;
q = r, r = r->next)
{
if (begin <= r->end) {
/* 'r' covers 'begin' -> take 'r' as beginning point */
begin_node = r;
break;
}
if (r->end + 1 == begin) {
/* 'begin' is next to 'r'-end
* -> extend 'r'-end to cover 'begin'
*/
r->end = begin;
begin_node = r;
break;
}
}
if (!begin_node && r && r->begin <= end + 1) {
/* ['begin', 'end'] overlaps into 'r'-begin
* or 'r' is next to 'end' if r->begin == end + 1
* -> extend 'r'-begin to include the range
*/
r->begin = begin;
begin_node = r;
}
/* Run upto the first range that exceeds 'end' */
while (r && r->begin <= end + 1) {
if (end <= r->end) {
/* 'r' covers 'end' -> take 'r' as ending point */
end_node = r;
} else if (r != begin_node) {
/* ['begin', 'end'] covers the whole 'r' -> remove 'r' */
if (q) {
q->next = r->next;
free (r);
r = q->next;
} else {
alpha_map->first_range = r->next;
free (r);
r = alpha_map->first_range;
}
continue;
}
q = r;
r = r->next;
}
if (!end_node && q && begin <= q->end) {
/* ['begin', 'end'] overlaps 'q' at the end
* -> extend 'q'-end to include the range
*/
q->end = end;
end_node = q;
}
if (begin_node && end_node) {
if (begin_node != end_node) {
/* Merge begin_node and end_node ranges together */
assert (begin_node->next == end_node);
begin_node->end = end_node->end;
begin_node->next = end_node->next;
free (end_node);
}
} else if (!begin_node && !end_node) {
/* ['begin', 'end'] overlaps with none of the ranges
* -> insert a new range
*/
AlphaRange *range = (AlphaRange *) malloc (sizeof (AlphaRange));
if (!range)
return -1;
range->begin = begin;
range->end = end;
/* insert it between 'q' and 'r' */
if (q) {
q->next = range;
} else {
alpha_map->first_range = range;
}
range->next = r;
}
return 0;
}
TrieIndex
alpha_map_char_to_trie (const AlphaMap *alpha_map, AlphaChar ac)
{
TrieIndex alpha_begin;
AlphaRange *range;
if (0 == ac)
return 0;
alpha_begin = 1;
for (range = alpha_map->first_range; range; range = range->next) {
if (range->begin <= ac && ac <= range->end)
return alpha_begin + (ac - range->begin);
alpha_begin += range->end - range->begin + 1;
}
return TRIE_INDEX_MAX;
}
AlphaChar
alpha_map_trie_to_char (const AlphaMap *alpha_map, TrieChar tc)
{
TrieChar alpha_begin;
AlphaRange *range;
if (0 == tc)
return 0;
alpha_begin = 1;
for (range = alpha_map->first_range; range; range = range->next) {
if (tc <= alpha_begin + (range->end - range->begin))
return range->begin + (tc - alpha_begin);
alpha_begin += range->end - range->begin + 1;
}
return ALPHA_CHAR_ERROR;
}
TrieChar *
alpha_map_char_to_trie_str (const AlphaMap *alpha_map, const AlphaChar *str)
{
TrieChar *trie_str, *p;
trie_str = (TrieChar *) malloc (alpha_char_strlen (str) + 1);
if (!trie_str)
return NULL;
for (p = trie_str; *str; p++, str++) {
TrieIndex tc = alpha_map_char_to_trie (alpha_map, *str);
if (TRIE_INDEX_MAX == tc)
goto error_str_allocated;
*p = (TrieChar) tc;
}
*p = 0;
return trie_str;
error_str_allocated:
free (trie_str);
return NULL;
}
AlphaChar *
alpha_map_trie_to_char_str (const AlphaMap *alpha_map, const TrieChar *str)
{
AlphaChar *alpha_str, *p;
alpha_str = (AlphaChar *) malloc ((strlen ((const char *)str) + 1)
* sizeof (AlphaChar));
if (!alpha_str)
return NULL;
for (p = alpha_str; *str; p++, str++) {
*p = (AlphaChar) alpha_map_trie_to_char (alpha_map, *str);
}
*p = 0;
return alpha_str;
}
/*
vi:ts=4:ai:expandtab
*/
axe-0.3.1/src/datrie/alpha-map.h 0000664 0000000 0000000 00000005534 12635670016 0016355 0 ustar 00root root 0000000 0000000 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* libdatrie - Double-Array Trie Library
* Copyright (C) 2006 Theppitak Karoonboonyanan
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* alpha-map.h - map between character codes and trie alphabet
* Created: 2006-08-19
* Author: Theppitak Karoonboonyanan
*/
#ifndef __ALPHA_MAP_H
#define __ALPHA_MAP_H
#include
#include "typedefs.h"
#include "triedefs.h"
#ifdef __cplusplus
extern "C" {
#endif
/**
* @file alpha-map.h
* @brief AlphaMap data type and functions
*
* AlphaMap is a mapping between AlphaChar and TrieChar. AlphaChar is the
* alphabet character used in words of a target language, while TrieChar
* is a small integer with packed range of values and is actually used in
* trie state transition calculations.
*
* Since double-array trie relies on sparse state transition table,
* a small set of input characters can make the table small, i.e. with
* small number of columns. But in real life, alphabet characters can be
* of non-continuous range of values. The unused slots between them can
* waste the space in the table, and can increase the chance of unused
* array cells.
*
* AlphaMap is thus defined for mapping between non-continuous ranges of
* values of AlphaChar and packed and continuous range of Triechar.
*
* In this implementation, TrieChar is defined as a single-byte integer,
* which means the largest AlphaChar set that is supported is of 255
* values, as the special value of 0 is reserved for null-termination code.
*/
/**
* @brief AlphaMap data type
*/
typedef struct _AlphaMap AlphaMap;
AlphaMap * alpha_map_new (void);
AlphaMap * alpha_map_clone (const AlphaMap *a_map);
void alpha_map_free (AlphaMap *alpha_map);
int alpha_map_add_range (AlphaMap *alpha_map,
AlphaChar begin,
AlphaChar end);
int alpha_char_strlen (const AlphaChar *str);
int alpha_char_strcmp (const AlphaChar *str1, const AlphaChar *str2);
#ifdef __cplusplus
}
#endif
#endif /* __ALPHA_MAP_H */
/*
vi:ts=4:ai:expandtab
*/
axe-0.3.1/src/datrie/darray.c 0000664 0000000 0000000 00000046043 12635670016 0015772 0 ustar 00root root 0000000 0000000 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* libdatrie - Double-Array Trie Library
* Copyright (C) 2006 Theppitak Karoonboonyanan
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* darray.c - Double-array trie structure
* Created: 2006-08-13
* Author: Theppitak Karoonboonyanan
*/
#include
#include
#ifndef _MSC_VER /* for SIZE_MAX */
# include
#endif
#include
#include "trie-private.h"
#include "darray.h"
/*----------------------------------*
* INTERNAL TYPES DECLARATIONS *
*----------------------------------*/
struct _Symbols {
short num_symbols;
TrieChar symbols[256];
};
#define symbols_add_fast(s,c) ((s)->symbols[(s)->num_symbols++] = c)
/*-----------------------------------*
* PRIVATE METHODS DECLARATIONS *
*-----------------------------------*/
#define da_get_free_list(d) (1)
static bool da_check_free_cell (DArray *d,
TrieIndex s);
static bool da_has_children (const DArray *d,
TrieIndex s);
static TrieIndex da_find_free_base (DArray *d,
const Symbols *symbols);
static bool da_fit_symbols (DArray *d,
TrieIndex base,
const Symbols *symbols);
static void da_relocate_base (DArray *d,
TrieIndex s,
TrieIndex new_base);
static bool da_extend_pool (DArray *d,
TrieIndex to_index);
static void da_alloc_cell (DArray *d,
TrieIndex cell);
static void da_free_cell (DArray *d,
TrieIndex cell);
/* ==================== BEGIN IMPLEMENTATION PART ==================== */
/*------------------------------------*
* INTERNAL TYPES IMPLEMENTATIONS *
*------------------------------------*/
Symbols *
symbols_new (void)
{
Symbols *syms;
syms = (Symbols *) malloc (sizeof (Symbols));
if (!syms)
return NULL;
syms->num_symbols = 0;
return syms;
}
void
symbols_free (Symbols *syms)
{
free (syms);
}
void
symbols_add (Symbols *syms, TrieChar c)
{
short lower, upper;
lower = 0;
upper = syms->num_symbols;
while (lower < upper) {
short middle;
middle = (lower + upper)/2;
if (c > syms->symbols[middle])
lower = middle + 1;
else if (c < syms->symbols[middle])
upper = middle;
else
return;
}
if (lower < syms->num_symbols) {
memmove (syms->symbols + lower + 1, syms->symbols + lower,
syms->num_symbols - lower);
}
syms->symbols[lower] = c;
syms->num_symbols++;
}
int
symbols_num (const Symbols *syms)
{
return syms->num_symbols;
}
TrieChar
symbols_get (const Symbols *syms, int index)
{
return syms->symbols[index];
}
/*------------------------------*
* PRIVATE DATA DEFINITONS *
*------------------------------*/
typedef struct {
TrieIndex base;
TrieIndex check;
} DACell;
struct _DArray {
TrieIndex num_cells;
DACell *cells;
};
/*-----------------------------*
* METHODS IMPLEMENTAIONS *
*-----------------------------*/
#define DA_SIGNATURE 0xDAFCDAFC
/* DA Header:
* - Cell 0: SIGNATURE, number of cells
* - Cell 1: free circular-list pointers
* - Cell 2: root node
* - Cell 3: DA pool begin
*/
#define DA_POOL_BEGIN 3
/**
* @brief Create a new double-array object
*
* Create a new empty doubla-array object.
*/
DArray *
da_new (void)
{
DArray *d;
d = (DArray *) malloc (sizeof (DArray));
if (!d)
return NULL;
d->num_cells = DA_POOL_BEGIN;
d->cells = (DACell *) malloc (d->num_cells * sizeof (DACell));
if (!d->cells)
goto exit_da_created;
d->cells[0].base = DA_SIGNATURE;
d->cells[0].check = d->num_cells;
d->cells[1].base = -1;
d->cells[1].check = -1;
d->cells[2].base = DA_POOL_BEGIN;
d->cells[2].check = 0;
return d;
exit_da_created:
free (d);
return NULL;
}
/**
* @brief Free double-array data
*
* @param d : the double-array data
*
* Free the given double-array data.
*/
void
da_free (DArray *d)
{
free (d->cells);
free (d);
}
/**
* @brief Get root state
*
* @param d : the double-array data
*
* @return root state of the @a index set, or TRIE_INDEX_ERROR on failure
*
* Get root state for stepwise walking.
*/
TrieIndex
da_get_root (const DArray *d)
{
(void) d;
/* can be calculated value for multi-index trie */
return 2;
}
/**
* @brief Get BASE cell
*
* @param d : the double-array data
* @param s : the double-array state to get data
*
* @return the BASE cell value for the given state
*
* Get BASE cell value for the given state.
*/
TrieIndex
da_get_base (const DArray *d, TrieIndex s)
{
return (s < d->num_cells) ? d->cells[s].base : TRIE_INDEX_ERROR;
}
/**
* @brief Get CHECK cell
*
* @param d : the double-array data
* @param s : the double-array state to get data
*
* @return the CHECK cell value for the given state
*
* Get CHECK cell value for the given state.
*/
TrieIndex
da_get_check (const DArray *d, TrieIndex s)
{
return (s < d->num_cells) ? d->cells[s].check : TRIE_INDEX_ERROR;
}
/**
* @brief Set BASE cell
*
* @param d : the double-array data
* @param s : the double-array state to get data
* @param val : the value to set
*
* Set BASE cell for the given state to the given value.
*/
void
da_set_base (DArray *d, TrieIndex s, TrieIndex val)
{
if (s < d->num_cells) {
d->cells[s].base = val;
}
}
/**
* @brief Set CHECK cell
*
* @param d : the double-array data
* @param s : the double-array state to get data
* @param val : the value to set
*
* Set CHECK cell for the given state to the given value.
*/
void
da_set_check (DArray *d, TrieIndex s, TrieIndex val)
{
if (s < d->num_cells) {
d->cells[s].check = val;
}
}
/**
* @brief Walk in double-array structure
*
* @param d : the double-array structure
* @param s : current state
* @param c : the input character
*
* @return boolean indicating success
*
* Walk the double-array trie from state @a *s, using input character @a c.
* If there exists an edge from @a *s with arc labeled @a c, this function
* returns true and @a *s is updated to the new state. Otherwise, it returns
* false and @a *s is left unchanged.
*/
bool
da_walk (const DArray *d, TrieIndex *s, TrieChar c)
{
TrieIndex next;
next = da_get_base (d, *s) + c;
if (da_get_check (d, next) == *s) {
*s = next;
return true;
}
return false;
}
/**
* @brief Insert a branch from trie node
*
* @param d : the double-array structure
* @param s : the state to add branch to
* @param c : the character for the branch label
*
* @return the index of the new node
*
* Insert a new arc labelled with character @a c from the trie node
* represented by index @a s in double-array structure @a d.
* Note that it assumes that no such arc exists before inserting.
*/
TrieIndex
da_insert_branch (DArray *d, TrieIndex s, TrieChar c)
{
TrieIndex base, next;
base = da_get_base (d, s);
if (base > 0) {
next = base + c;
/* if already there, do not actually insert */
if (da_get_check (d, next) == s)
return next;
/* if (base + c) > TRIE_INDEX_MAX which means 'next' is overflow,
* or cell [next] is not free, relocate to a free slot
*/
if (base > TRIE_INDEX_MAX - c || !da_check_free_cell (d, next)) {
Symbols *symbols;
TrieIndex new_base;
/* relocate BASE[s] */
symbols = da_output_symbols (d, s);
symbols_add (symbols, c);
new_base = da_find_free_base (d, symbols);
symbols_free (symbols);
if (TRIE_INDEX_ERROR == new_base)
return TRIE_INDEX_ERROR;
da_relocate_base (d, s, new_base);
next = new_base + c;
}
} else {
Symbols *symbols;
TrieIndex new_base;
symbols = symbols_new ();
symbols_add (symbols, c);
new_base = da_find_free_base (d, symbols);
symbols_free (symbols);
if (TRIE_INDEX_ERROR == new_base)
return TRIE_INDEX_ERROR;
da_set_base (d, s, new_base);
next = new_base + c;
}
da_alloc_cell (d, next);
da_set_check (d, next, s);
return next;
}
static bool
da_check_free_cell (DArray *d,
TrieIndex s)
{
return da_extend_pool (d, s) && da_get_check (d, s) < 0;
}
static bool
da_has_children (const DArray *d,
TrieIndex s)
{
TrieIndex base;
TrieIndex c, max_c;
base = da_get_base (d, s);
if (TRIE_INDEX_ERROR == base || base < 0)
return false;
max_c = MIN_VAL (TRIE_CHAR_MAX, d->num_cells - base);
for (c = 0; c <= max_c; c++) {
if (da_get_check (d, base + c) == s)
return true;
}
return false;
}
Symbols *
da_output_symbols (const DArray *d,
TrieIndex s)
{
Symbols *syms;
TrieIndex base;
TrieIndex c, max_c;
syms = symbols_new ();
base = da_get_base (d, s);
max_c = MIN_VAL (TRIE_CHAR_MAX, d->num_cells - base);
for (c = 0; c <= max_c; c++) {
if (da_get_check (d, base + c) == s)
symbols_add_fast (syms, (TrieChar) c);
}
return syms;
}
static TrieIndex
da_find_free_base (DArray *d,
const Symbols *symbols)
{
TrieChar first_sym;
TrieIndex s;
/* find first free cell that is beyond the first symbol */
first_sym = symbols_get (symbols, 0);
s = -da_get_check (d, da_get_free_list (d));
while (s != da_get_free_list (d)
&& s < (TrieIndex) first_sym + DA_POOL_BEGIN)
{
s = -da_get_check (d, s);
}
if (s == da_get_free_list (d)) {
for (s = first_sym + DA_POOL_BEGIN; ; ++s) {
if (!da_extend_pool (d, s))
return TRIE_INDEX_ERROR;
if (da_get_check (d, s) < 0)
break;
}
}
/* search for next free cell that fits the symbols set */
while (!da_fit_symbols (d, s - first_sym, symbols)) {
/* extend pool before getting exhausted */
if (-da_get_check (d, s) == da_get_free_list (d)) {
if (!da_extend_pool (d, d->num_cells))
return TRIE_INDEX_ERROR;
}
s = -da_get_check (d, s);
}
return s - first_sym;
}
static bool
da_fit_symbols (DArray *d,
TrieIndex base,
const Symbols *symbols)
{
int i;
for (i = 0; i < symbols_num (symbols); i++) {
TrieChar sym = symbols_get (symbols, i);
/* if (base + sym) > TRIE_INDEX_MAX which means it's overflow,
* or cell [base + sym] is not free, the symbol is not fit.
*/
if (base > TRIE_INDEX_MAX - sym || !da_check_free_cell (d, base + sym))
return false;
}
return true;
}
static void
da_relocate_base (DArray *d,
TrieIndex s,
TrieIndex new_base)
{
TrieIndex old_base;
Symbols *symbols;
int i;
old_base = da_get_base (d, s);
symbols = da_output_symbols (d, s);
for (i = 0; i < symbols_num (symbols); i++) {
TrieIndex old_next, new_next, old_next_base;
old_next = old_base + symbols_get (symbols, i);
new_next = new_base + symbols_get (symbols, i);
old_next_base = da_get_base (d, old_next);
/* allocate new next node and copy BASE value */
da_alloc_cell (d, new_next);
da_set_check (d, new_next, s);
da_set_base (d, new_next, old_next_base);
/* old_next node is now moved to new_next
* so, all cells belonging to old_next
* must be given to new_next
*/
/* preventing the case of TAIL pointer */
if (old_next_base > 0) {
TrieIndex c, max_c;
max_c = MIN_VAL (TRIE_CHAR_MAX, d->num_cells - old_next_base);
for (c = 0; c <= max_c; c++) {
if (da_get_check (d, old_next_base + c) == old_next)
da_set_check (d, old_next_base + c, new_next);
}
}
/* free old_next node */
da_free_cell (d, old_next);
}
symbols_free (symbols);
/* finally, make BASE[s] point to new_base */
da_set_base (d, s, new_base);
}
static bool
da_extend_pool (DArray *d,
TrieIndex to_index)
{
TrieIndex new_begin;
TrieIndex i;
TrieIndex free_tail;
if (to_index <= 0 || TRIE_INDEX_MAX <= to_index)
return false;
if (to_index < d->num_cells)
return true;
d->cells = (DACell *) realloc (d->cells, (to_index + 1) * sizeof (DACell));
new_begin = d->num_cells;
d->num_cells = to_index + 1;
/* initialize new free list */
for (i = new_begin; i < to_index; i++) {
da_set_check (d, i, -(i + 1));
da_set_base (d, i + 1, -i);
}
/* merge the new circular list to the old */
free_tail = -da_get_base (d, da_get_free_list (d));
da_set_check (d, free_tail, -new_begin);
da_set_base (d, new_begin, -free_tail);
da_set_check (d, to_index, -da_get_free_list (d));
da_set_base (d, da_get_free_list (d), -to_index);
/* update header cell */
d->cells[0].check = d->num_cells;
return true;
}
/**
* @brief Prune the single branch
*
* @param d : the double-array structure
* @param s : the dangling state to prune off
*
* Prune off a non-separate path up from the final state @a s.
* If @a s still has some children states, it does nothing. Otherwise,
* it deletes the node and all its parents which become non-separate.
*/
void
da_prune (DArray *d, TrieIndex s)
{
da_prune_upto (d, da_get_root (d), s);
}
/**
* @brief Prune the single branch up to given parent
*
* @param d : the double-array structure
* @param p : the parent up to which to be pruned
* @param s : the dangling state to prune off
*
* Prune off a non-separate path up from the final state @a s to the
* given parent @a p. The prunning stop when either the parent @a p
* is met, or a first non-separate node is found.
*/
void
da_prune_upto (DArray *d, TrieIndex p, TrieIndex s)
{
while (p != s && !da_has_children (d, s)) {
TrieIndex parent;
parent = da_get_check (d, s);
da_free_cell (d, s);
s = parent;
}
}
static void
da_alloc_cell (DArray *d,
TrieIndex cell)
{
TrieIndex prev, next;
prev = -da_get_base (d, cell);
next = -da_get_check (d, cell);
/* remove the cell from free list */
da_set_check (d, prev, -next);
da_set_base (d, next, -prev);
}
static void
da_free_cell (DArray *d,
TrieIndex cell)
{
TrieIndex i, prev;
/* find insertion point */
i = -da_get_check (d, da_get_free_list (d));
while (i != da_get_free_list (d) && i < cell)
i = -da_get_check (d, i);
prev = -da_get_base (d, i);
/* insert cell before i */
da_set_check (d, cell, -i);
da_set_base (d, cell, -prev);
da_set_check (d, prev, -cell);
da_set_base (d, i, -cell);
}
/**
* @brief Find first separate node in a sub-trie
*
* @param d : the double-array structure
* @param root : the sub-trie root to search from
* @param keybuff : the TrieString buffer for incrementally calcuating key
*
* @return index to the first separate node; TRIE_INDEX_ERROR on any failure
*
* Find the first separate node under a sub-trie rooted at @a root.
*
* On return, @a keybuff is appended with the key characters which walk from
* @a root to the separate node. This is for incrementally calculating the
* transition key, which is more efficient than later totally reconstructing
* key from the given separate node.
*
* Available since: 0.2.6
*/
TrieIndex
da_first_separate (DArray *d, TrieIndex root, TrieString *keybuff)
{
TrieIndex base;
TrieIndex c, max_c;
while ((base = da_get_base (d, root)) >= 0) {
max_c = MIN_VAL (TRIE_CHAR_MAX, d->num_cells - base);
for (c = 0; c <= max_c; c++) {
if (da_get_check (d, base + c) == root)
break;
}
if (c == max_c)
return TRIE_INDEX_ERROR;
trie_string_append_char (keybuff, c);
root = base + c;
}
return root;
}
/**
* @brief Find next separate node in a sub-trie
*
* @param d : the double-array structure
* @param root : the sub-trie root to search from
* @param sep : the current separate node
* @param keybuff : the TrieString buffer for incrementally calcuating key
*
* @return index to the next separate node; TRIE_INDEX_ERROR if no more
* separate node is found
*
* Find the next separate node under a sub-trie rooted at @a root starting
* from the current separate node @a sep.
*
* On return, @a keybuff is incrementally updated from the key which walks
* to previous separate node to the one which walks to the new separate node.
* So, it is assumed to be initialized by at least one da_first_separate()
* call before. This incremental key calculation is more efficient than later
* totally reconstructing key from the given separate node.
*
* Available since: 0.2.6
*/
TrieIndex
da_next_separate (DArray *d, TrieIndex root, TrieIndex sep, TrieString *keybuff)
{
TrieIndex parent;
TrieIndex base;
TrieIndex c, max_c;
while (sep != root) {
parent = da_get_check (d, sep);
base = da_get_base (d, parent);
c = sep - base;
trie_string_cut_last (keybuff);
/* find next sibling of sep */
max_c = MIN_VAL (TRIE_CHAR_MAX, d->num_cells - base);
while (++c <= max_c) {
if (da_get_check (d, base + c) == parent) {
trie_string_append_char (keybuff, c);
return da_first_separate (d, base + c, keybuff);
}
}
sep = parent;
}
return TRIE_INDEX_ERROR;
}
/*
vi:ts=4:ai:expandtab
*/
axe-0.3.1/src/datrie/darray.h 0000664 0000000 0000000 00000005754 12635670016 0016003 0 ustar 00root root 0000000 0000000 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* libdatrie - Double-Array Trie Library
* Copyright (C) 2006 Theppitak Karoonboonyanan
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* darray.h - Double-array trie structure
* Created: 2006-08-11
* Author: Theppitak Karoonboonyanan
*/
#ifndef __DARRAY_H
#define __DARRAY_H
#include "triedefs.h"
#include "trie-string.h"
/**
* @file darray.h
* @brief Double-array trie structure
*/
/**
* @brief Symbol set structure type
*/
typedef struct _Symbols Symbols;
Symbols * symbols_new (void);
void symbols_free (Symbols *syms);
void symbols_add (Symbols *syms, TrieChar c);
int symbols_num (const Symbols *syms);
TrieChar symbols_get (const Symbols *syms, int index);
/**
* @brief Double-array structure type
*/
typedef struct _DArray DArray;
DArray * da_new (void);
void da_free (DArray *d);
TrieIndex da_get_root (const DArray *d);
TrieIndex da_get_base (const DArray *d, TrieIndex s);
TrieIndex da_get_check (const DArray *d, TrieIndex s);
void da_set_base (DArray *d, TrieIndex s, TrieIndex val);
void da_set_check (DArray *d, TrieIndex s, TrieIndex val);
bool da_walk (const DArray *d, TrieIndex *s, TrieChar c);
Symbols * da_output_symbols (const DArray *d, TrieIndex s);
/**
* @brief Test walkability in double-array structure
*
* @param d : the double-array structure
* @param s : current state
* @param c : the input character
*
* @return boolean indicating walkability
*
* Test if there is a transition from state @a s with input character @a c.
*/
/*
bool da_is_walkable (DArray *d, TrieIndex s, TrieChar c);
*/
#define da_is_walkable(d,s,c) \
(da_get_check ((d), da_get_base ((d), (s)) + (c)) == (s))
TrieIndex da_insert_branch (DArray *d, TrieIndex s, TrieChar c);
void da_prune (DArray *d, TrieIndex s);
void da_prune_upto (DArray *d, TrieIndex p, TrieIndex s);
TrieIndex da_first_separate (DArray *d, TrieIndex root, TrieString *keybuff);
TrieIndex da_next_separate (DArray *d,
TrieIndex root,
TrieIndex sep,
TrieString *keybuff);
#endif /* __DARRAY_H */
/*
vi:ts=4:ai:expandtab
*/
axe-0.3.1/src/datrie/dstring-private.h 0000664 0000000 0000000 00000002447 12635670016 0017637 0 ustar 00root root 0000000 0000000 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* libdatrie - Double-Array Trie Library
* Copyright (C) 2006 Theppitak Karoonboonyanan
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* dstring-private.h - Dynamic string type
* Created: 2012-08-02
* Author: Theppitak Karoonboonyanan
*/
#ifndef __DSTRING_PRIVATE_H
#define __DSTRING_PRIVATE_H
#include "typedefs.h"
struct _DString {
int char_size;
int str_len;
int alloc_size;
void * val;
};
#endif /* __DSTRING_PRIVATE_H */
/*
vi:ts=4:ai:expandtab
*/
axe-0.3.1/src/datrie/dstring.c 0000664 0000000 0000000 00000007636 12635670016 0016167 0 ustar 00root root 0000000 0000000 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* libdatrie - Double-Array Trie Library
* Copyright (C) 2006 Theppitak Karoonboonyanan
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* dstring.c - Dynamic string type
* Created: 2012-08-01
* Author: Theppitak Karoonboonyanan
*/
#include "dstring.h"
#include "dstring-private.h"
#include "trie-private.h"
#include
#include
DString *
dstring_new (int char_size, int n_elm)
{
DString *ds;
ds = (DString *) malloc (sizeof (DString));
if (!ds)
return NULL;
ds->alloc_size = char_size * n_elm;
ds->val = malloc (ds->alloc_size);
if (!ds->val) {
free (ds);
return NULL;
}
ds->char_size = char_size;
ds->str_len = 0;
return ds;
}
void
dstring_free (DString *ds)
{
free (ds->val);
free (ds);
}
int
dstring_length (const DString *ds)
{
return ds->str_len;
}
const void *
dstring_get_val (const DString *ds)
{
return ds->val;
}
void *
dstring_get_val_rw (DString *ds)
{
return ds->val;
}
void
dstring_clear (DString *ds)
{
ds->str_len = 0;
}
static bool
dstring_ensure_space (DString *ds, int size)
{
if (ds->alloc_size < size) {
int re_size = MAX_VAL (ds->alloc_size * 2, size);
void *re_ptr = realloc (ds->val, re_size);
if (!re_ptr)
return false;
ds->val = re_ptr;
ds->alloc_size = re_size;
}
return true;
}
bool
dstring_copy (DString *dst, const DString *src)
{
if (!dstring_ensure_space (dst, (src->str_len + 1) * src->char_size))
return false;
memcpy (dst->val, src->val, (src->str_len + 1) * src->char_size);
dst->char_size = src->char_size;
dst->str_len = src->str_len;
return true;
}
bool
dstring_append (DString *dst, const DString *src)
{
if (dst->char_size != src->char_size)
return false;
if (!dstring_ensure_space (dst, (dst->str_len + src->str_len + 1)
* dst->char_size))
{
return false;
}
memcpy ((char *)dst->val + (dst->char_size * dst->str_len), src->val,
(src->str_len + 1) * dst->char_size);
dst->str_len += src->str_len;
return true;
}
bool
dstring_append_string (DString *ds, const void *data, int len)
{
if (!dstring_ensure_space (ds, (ds->str_len + len + 1) * ds->char_size))
return false;
memcpy ((char *)ds->val + (ds->char_size * ds->str_len), data,
ds->char_size * len);
ds->str_len += len;
return true;
}
bool
dstring_append_char (DString *ds, const void *data)
{
if (!dstring_ensure_space (ds, (ds->str_len + 2) * ds->char_size))
return false;
memcpy ((char *)ds->val + (ds->char_size * ds->str_len), data,
ds->char_size);
ds->str_len++;
return true;
}
bool
dstring_terminate (DString *ds)
{
if (!dstring_ensure_space (ds, (ds->str_len + 2) * ds->char_size))
return false;
memset ((char *)ds->val + (ds->char_size * ds->str_len), 0, ds->char_size);
return true;
}
bool
dstring_cut_last (DString *ds)
{
if (0 == ds->str_len)
return false;
ds->str_len--;
return true;
}
/*
vi:ts=4:ai:expandtab
*/
axe-0.3.1/src/datrie/dstring.h 0000664 0000000 0000000 00000003455 12635670016 0016167 0 ustar 00root root 0000000 0000000 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* libdatrie - Double-Array Trie Library
* Copyright (C) 2006 Theppitak Karoonboonyanan
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* dstring.h - Dynamic string type
* Created: 2012-08-01
* Author: Theppitak Karoonboonyanan
*/
#ifndef __DSTRING_H
#define __DSTRING_H
#include "typedefs.h"
typedef struct _DString DString;
DString * dstring_new (int char_size, int n_elm);
void dstring_free (DString *ds);
int dstring_length (const DString *ds);
const void * dstring_get_val (const DString *ds);
void * dstring_get_val_rw (DString *ds);
void dstring_clear (DString *ds);
bool dstring_copy (DString *dst, const DString *src);
bool dstring_append (DString *dst, const DString *src);
bool dstring_append_string (DString *ds, const void *data, int len);
bool dstring_append_char (DString *ds, const void *data);
bool dstring_terminate (DString *ds);
bool dstring_cut_last (DString *ds);
#endif /* __DSTRING_H */
/*
vi:ts=4:ai:expandtab
*/
axe-0.3.1/src/datrie/tail.c 0000664 0000000 0000000 00000022537 12635670016 0015443 0 ustar 00root root 0000000 0000000 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* libdatrie - Double-Array Trie Library
* Copyright (C) 2006 Theppitak Karoonboonyanan
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* tail.c - trie tail for keeping suffixes
* Created: 2006-08-15
* Author: Theppitak Karoonboonyanan
*/
#include
#include
#ifndef _MSC_VER /* for SIZE_MAX */
# include
#endif
#include
#include "tail.h"
/*----------------------------------*
* INTERNAL TYPES DECLARATIONS *
*----------------------------------*/
/*-----------------------------------*
* PRIVATE METHODS DECLARATIONS *
*-----------------------------------*/
static TrieIndex tail_alloc_block (Tail *t);
static void tail_free_block (Tail *t, TrieIndex block);
/* ==================== BEGIN IMPLEMENTATION PART ==================== */
/*------------------------------------*
* INTERNAL TYPES IMPLEMENTATIONS *
*------------------------------------*/
/*------------------------------*
* PRIVATE DATA DEFINITONS *
*------------------------------*/
typedef struct {
TrieIndex next_free;
TrieData data;
TrieChar *suffix;
} TailBlock;
struct _Tail {
TrieIndex num_tails;
TailBlock *tails;
TrieIndex first_free;
};
/*-----------------------------*
* METHODS IMPLEMENTAIONS *
*-----------------------------*/
#define TAIL_SIGNATURE 0xDFFCDFFC
#define TAIL_START_BLOCKNO 1
/* Tail Header:
* INT32: signature
* INT32: pointer to first free slot
* INT32: number of tail blocks
*
* Tail Blocks:
* INT32: pointer to next free block (-1 for allocated blocks)
* INT32: data for the key
* INT16: length
* BYTES[length]: suffix string (no terminating '\0')
*/
/**
* @brief Create a new tail object
*
* Create a new empty tail object.
*/
Tail *
tail_new (void)
{
Tail *t;
t = (Tail *) malloc (sizeof (Tail));
if (!t)
return NULL;
t->first_free = 0;
t->num_tails = 0;
t->tails = NULL;
return t;
}
/**
* @brief Free tail data
*
* @param t : the tail data
*
* @return 0 on success, non-zero on failure
*
* Free the given tail data.
*/
void
tail_free (Tail *t)
{
TrieIndex i;
if (t->tails) {
for (i = 0; i < t->num_tails; i++)
if (t->tails[i].suffix)
free (t->tails[i].suffix);
free (t->tails);
}
free (t);
}
/**
* @brief Get suffix
*
* @param t : the tail data
* @param index : the index of the suffix
*
* @return pointer to the indexed suffix string.
*
* Get suffix from tail with given @a index. The returned string is a pointer
* to internal storage, which should be accessed read-only by the caller.
* No need to free() it.
*/
const TrieChar *
tail_get_suffix (const Tail *t, TrieIndex index)
{
index -= TAIL_START_BLOCKNO;
return (index < t->num_tails) ? t->tails[index].suffix : NULL;
}
/**
* @brief Set suffix of existing entry
*
* @param t : the tail data
* @param index : the index of the suffix
* @param suffix : the new suffix
*
* Set suffix of existing entry of given @a index in tail.
*/
bool
tail_set_suffix (Tail *t, TrieIndex index, const TrieChar *suffix)
{
index -= TAIL_START_BLOCKNO;
if (index < t->num_tails) {
/* suffix and t->tails[index].suffix may overlap;
* so, dup it before it's overwritten
*/
TrieChar *tmp = NULL;
if (suffix)
tmp = (TrieChar *) strdup ((const char *)suffix);
if (t->tails[index].suffix)
free (t->tails[index].suffix);
t->tails[index].suffix = tmp;
return true;
}
return false;
}
/**
* @brief Add a new suffix
*
* @param t : the tail data
* @param suffix : the new suffix
*
* @return the index of the newly added suffix.
*
* Add a new suffix entry to tail.
*/
TrieIndex
tail_add_suffix (Tail *t, const TrieChar *suffix)
{
TrieIndex new_block;
new_block = tail_alloc_block (t);
tail_set_suffix (t, new_block, suffix);
return new_block;
}
static TrieIndex
tail_alloc_block (Tail *t)
{
TrieIndex block;
if (0 != t->first_free) {
block = t->first_free;
t->first_free = t->tails[block].next_free;
} else {
block = t->num_tails;
t->tails = (TailBlock *) realloc (t->tails,
++t->num_tails * sizeof (TailBlock));
}
t->tails[block].next_free = -1;
t->tails[block].data = TRIE_DATA_ERROR;
t->tails[block].suffix = NULL;
return block + TAIL_START_BLOCKNO;
}
static void
tail_free_block (Tail *t, TrieIndex block)
{
TrieIndex i, j;
block -= TAIL_START_BLOCKNO;
if (block >= t->num_tails)
return;
t->tails[block].data = TRIE_DATA_ERROR;
if (NULL != t->tails[block].suffix) {
free (t->tails[block].suffix);
t->tails[block].suffix = NULL;
}
/* find insertion point */
j = 0;
for (i = t->first_free; i != 0 && i < block; i = t->tails[i].next_free)
j = i;
/* insert free block between j and i */
t->tails[block].next_free = i;
if (0 != j)
t->tails[j].next_free = block;
else
t->first_free = block;
}
/**
* @brief Get data associated to suffix entry
*
* @param t : the tail data
* @param index : the index of the suffix
*
* @return the data associated to the suffix entry
*
* Get data associated to suffix entry @a index in tail data.
*/
TrieData
tail_get_data (const Tail *t, TrieIndex index)
{
index -= TAIL_START_BLOCKNO;
return (index < t->num_tails) ? t->tails[index].data : TRIE_DATA_ERROR;
}
/**
* @brief Set data associated to suffix entry
*
* @param t : the tail data
* @param index : the index of the suffix
* @param data : the data to set
*
* @return boolean indicating success
*
* Set data associated to suffix entry @a index in tail data.
*/
bool
tail_set_data (Tail *t, TrieIndex index, TrieData data)
{
index -= TAIL_START_BLOCKNO;
if (index < t->num_tails) {
t->tails[index].data = data;
return true;
}
return false;
}
/**
* @brief Delete suffix entry
*
* @param t : the tail data
* @param index : the index of the suffix to delete
*
* Delete suffix entry from the tail data.
*/
void
tail_delete (Tail *t, TrieIndex index)
{
tail_free_block (t, index);
}
/**
* @brief Walk in tail with a string
*
* @param t : the tail data
* @param s : the tail data index
* @param suffix_idx : pointer to current character index in suffix
* @param str : the string to use in walking
* @param len : total characters in @a str to walk
*
* @return total number of characters successfully walked
*
* Walk in the tail data @a t at entry @a s, from given character position
* @a *suffix_idx, using @a len characters of given string @a str. On return,
* @a *suffix_idx is updated to the position after the last successful walk,
* and the function returns the total number of character succesfully walked.
*/
int
tail_walk_str (const Tail *t,
TrieIndex s,
short *suffix_idx,
const TrieChar *str,
int len)
{
const TrieChar *suffix;
int i;
short j;
suffix = tail_get_suffix (t, s);
if (!suffix)
return false;
i = 0; j = *suffix_idx;
while (i < len) {
if (str[i] != suffix[j])
break;
++i;
/* stop and stay at null-terminator */
if (0 == suffix[j])
break;
++j;
}
*suffix_idx = j;
return i;
}
/**
* @brief Walk in tail with a character
*
* @param t : the tail data
* @param s : the tail data index
* @param suffix_idx : pointer to current character index in suffix
* @param c : the character to use in walking
*
* @return boolean indicating success
*
* Walk in the tail data @a t at entry @a s, from given character position
* @a *suffix_idx, using given character @a c. If the walk is successful,
* it returns true, and @a *suffix_idx is updated to the next character.
* Otherwise, it returns false, and @a *suffix_idx is left unchanged.
*/
bool
tail_walk_char (const Tail *t,
TrieIndex s,
short *suffix_idx,
TrieChar c)
{
const TrieChar *suffix;
TrieChar suffix_char;
suffix = tail_get_suffix (t, s);
if (!suffix)
return false;
suffix_char = suffix[*suffix_idx];
if (suffix_char == c) {
if (0 != suffix_char)
++*suffix_idx;
return true;
}
return false;
}
/*
vi:ts=4:ai:expandtab
*/
axe-0.3.1/src/datrie/tail.h 0000664 0000000 0000000 00000005626 12635670016 0015450 0 ustar 00root root 0000000 0000000 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* libdatrie - Double-Array Trie Library
* Copyright (C) 2006 Theppitak Karoonboonyanan
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* tail.h - trie tail for keeping suffixes
* Created: 2006-08-12
* Author: Theppitak Karoonboonyanan
*/
#ifndef __TAIL_H
#define __TAIL_H
#include "triedefs.h"
/**
* @file tail.h
* @brief trie tail for keeping suffixes
*/
/**
* @brief Double-array structure type
*/
typedef struct _Tail Tail;
Tail * tail_new (void);
void tail_free (Tail *t);
const TrieChar * tail_get_suffix (const Tail *t, TrieIndex index);
bool tail_set_suffix (Tail *t, TrieIndex index, const TrieChar *suffix);
TrieIndex tail_add_suffix (Tail *t, const TrieChar *suffix);
TrieData tail_get_data (const Tail *t, TrieIndex index);
bool tail_set_data (Tail *t, TrieIndex index, TrieData data);
void tail_delete (Tail *t, TrieIndex index);
int tail_walk_str (const Tail *t,
TrieIndex s,
short *suffix_idx,
const TrieChar *str,
int len);
bool tail_walk_char (const Tail *t,
TrieIndex s,
short *suffix_idx,
TrieChar c);
/**
* @brief Test walkability in tail with a character
*
* @param t : the tail data
* @param s : the tail data index
* @param suffix_idx : current character index in suffix
* @param c : the character to test walkability
*
* @return boolean indicating walkability
*
* Test if the character @a c can be used to walk from given character
* position @a suffix_idx of entry @a s of the tail data @a t.
*/
/*
bool tail_is_walkable_char (Tail *t,
TrieIndex s,
short suffix_idx,
const TrieChar c);
*/
#define tail_is_walkable_char(t,s,suffix_idx,c) \
(tail_get_suffix ((t), (s)) [suffix_idx] == (c))
#endif /* __TAIL_H */
/*
vi:ts=4:ai:expandtab
*/
axe-0.3.1/src/datrie/trie-private.h 0000664 0000000 0000000 00000002701 12635670016 0017121 0 ustar 00root root 0000000 0000000 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* libdatrie - Double-Array Trie Library
* Copyright (C) 2006 Theppitak Karoonboonyanan
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* trie-private.h - Private utilities for trie implementation
* Created: 2007-08-25
* Author: Theppitak Karoonboonyanan
*/
#ifndef __TRIE_PRIVATE_H
#define __TRIE_PRIVATE_H
#include
/**
* @file trie-private.h
* @brief Private utilities for trie implementation
*/
/**
* @brief Minimum value macro
*/
#define MIN_VAL(a,b) ((a)<(b)?(a):(b))
/**
* @brief Maximum value macro
*/
#define MAX_VAL(a,b) ((a)>(b)?(a):(b))
#endif /* __TRIE_PRIVATE_H */
/*
vi:ts=4:ai:expandtab
*/
axe-0.3.1/src/datrie/trie-string.c 0000664 0000000 0000000 00000005033 12635670016 0016751 0 ustar 00root root 0000000 0000000 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* libdatrie - Double-Array Trie Library
* Copyright (C) 2006 Theppitak Karoonboonyanan
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* trie-string.c - Dynamic string type for Trie alphabets
* Created: 2012-08-02
* Author: Theppitak Karoonboonyanan
*/
#include "trie-string.h"
#include "dstring-private.h"
#include "triedefs.h"
#include
struct _TrieString {
DString ds;
};
TrieString *
trie_string_new (int n_elm)
{
return (TrieString *) dstring_new (sizeof (TrieChar), n_elm);
}
void
trie_string_free (TrieString *ts)
{
dstring_free ((DString *)ts);
}
int
trie_string_length (const TrieString *ts)
{
return dstring_length ((DString *)ts);
}
const void *
trie_string_get_val (const TrieString *ts)
{
return dstring_get_val ((DString *)ts);
}
void *
trie_string_get_val_rw (TrieString *ts)
{
return dstring_get_val_rw ((DString *)ts);
}
void
trie_string_clear (TrieString *ts)
{
dstring_clear ((DString *)ts);
}
bool
trie_string_copy (TrieString *dst, const TrieString *src)
{
return dstring_copy ((DString *)dst, (const DString *)src);
}
bool
trie_string_append (TrieString *dst, const TrieString *src)
{
return dstring_append ((DString *)dst, (const DString *)src);
}
bool
trie_string_append_string (TrieString *ts, const TrieChar *str)
{
return dstring_append_string ((DString *)ts,
str, strlen ((const char *)str));
}
bool
trie_string_append_char (TrieString *ts, TrieChar tc)
{
return dstring_append_char ((DString *)ts, &tc);
}
bool
trie_string_terminate (TrieString *ts)
{
return dstring_terminate ((DString *)ts);
}
bool
trie_string_cut_last (TrieString *ts)
{
return dstring_cut_last ((DString *)ts);
}
/*
vi:ts=4:ai:expandtab
*/
axe-0.3.1/src/datrie/trie-string.h 0000664 0000000 0000000 00000003655 12635670016 0016766 0 ustar 00root root 0000000 0000000 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* libdatrie - Double-Array Trie Library
* Copyright (C) 2006 Theppitak Karoonboonyanan
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* trie-string.h - Dynamic string type for Trie alphabets
* Created: 2012-08-02
* Author: Theppitak Karoonboonyanan
*/
#ifndef __TRIE_STRING_H
#define __TRIE_STRING_H
#include "dstring.h"
#include "triedefs.h"
typedef struct _TrieString TrieString;
TrieString * trie_string_new (int n_elm);
void trie_string_free (TrieString *ts);
int trie_string_length (const TrieString *ts);
const void * trie_string_get_val (const TrieString *ts);
void * trie_string_get_val_rw (TrieString *ts);
void trie_string_clear (TrieString *ts);
bool trie_string_copy (TrieString *dst, const TrieString *src);
bool trie_string_append (TrieString *dst, const TrieString *src);
bool trie_string_append_string (TrieString *ts, const TrieChar *str);
bool trie_string_append_char (TrieString *ts, TrieChar tc);
bool trie_string_terminate (TrieString *ts);
bool trie_string_cut_last (TrieString *ts);
#endif /* __TRIE_STRING_H */
/*
vi:ts=4:ai:expandtab
*/
axe-0.3.1/src/datrie/trie.c 0000664 0000000 0000000 00000061414 12635670016 0015452 0 ustar 00root root 0000000 0000000 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* libdatrie - Double-Array Trie Library
* Copyright (C) 2006 Theppitak Karoonboonyanan
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* trie.c - Trie data type and functions
* Created: 2006-08-11
* Author: Theppitak Karoonboonyanan
*/
#include
#include
#include "trie.h"
#include "alpha-map.h"
#include "alpha-map-private.h"
#include "darray.h"
#include "tail.h"
#include "trie-string.h"
/**
* @brief Trie structure
*/
struct _Trie {
AlphaMap *alpha_map;
DArray *da;
Tail *tail;
bool is_dirty;
};
/**
* @brief TrieState structure
*/
struct _TrieState {
const Trie *trie; /**< the corresponding trie */
TrieIndex index; /**< index in double-array/tail structures */
short suffix_idx; /**< suffix character offset, if in suffix */
short is_suffix; /**< whether it is currently in suffix part */
};
/**
* @brief TrieIterator structure
*/
struct _TrieIterator {
const TrieState *root; /**< the state to start iteration from */
TrieState *state; /**< the current state */
TrieString *key; /**< buffer for calculating the entry key */
};
/*------------------------*
* INTERNAL FUNCTIONS *
*------------------------*/
#define trie_da_is_separate(da,s) (da_get_base ((da), (s)) < 0)
#define trie_da_get_tail_index(da,s) (-da_get_base ((da), (s)))
#define trie_da_set_tail_index(da,s,v) (da_set_base ((da), (s), -(v)))
static TrieState * trie_state_new (const Trie *trie,
TrieIndex index,
short suffix_idx,
short is_suffix);
static bool trie_store_conditionally (Trie *trie,
const AlphaChar *key,
TrieData data,
bool is_overwrite);
static bool trie_branch_in_branch (Trie *trie,
TrieIndex sep_node,
const TrieChar *suffix,
TrieData data);
static bool trie_branch_in_tail (Trie *trie,
TrieIndex sep_node,
const TrieChar *suffix,
TrieData data);
/*-----------------------*
* GENERAL FUNCTIONS *
*-----------------------*/
/**
* @brief Create a new trie
*
* @param alpha_map : the alphabet set for the trie
*
* @return a pointer to the newly created trie, NULL on failure
*
* Create a new empty trie object based on the given @a alpha_map alphabet
* set. The trie contents can then be added and deleted with trie_store() and
* trie_delete() respectively.
*
* The created object must be freed with trie_free().
*/
Trie *
trie_new (const AlphaMap *alpha_map)
{
Trie *trie;
trie = (Trie *) malloc (sizeof (Trie));
if (!trie)
return NULL;
trie->alpha_map = alpha_map_clone (alpha_map);
if (!trie->alpha_map)
goto exit_trie_created;
trie->da = da_new ();
if (!trie->da)
goto exit_alpha_map_created;
trie->tail = tail_new ();
if (!trie->tail)
goto exit_da_created;
trie->is_dirty = true;
return trie;
exit_da_created:
da_free (trie->da);
exit_alpha_map_created:
alpha_map_free (trie->alpha_map);
exit_trie_created:
free (trie);
return NULL;
}
/**
* @brief Free a trie object
*
* @param trie : the trie object to free
*
* Destruct the @a trie and free its allocated memory.
*/
void
trie_free (Trie *trie)
{
alpha_map_free (trie->alpha_map);
da_free (trie->da);
tail_free (trie->tail);
free (trie);
}
/**
* @brief Check pending changes
*
* @param trie : the trie object
*
* @return true if there are pending changes, false otherwise
*
* Check if the @a trie is dirty with some pending changes and needs saving
* to synchronize with the file.
*/
bool
trie_is_dirty (const Trie *trie)
{
return trie->is_dirty;
}
/*------------------------------*
* GENERAL QUERY OPERATIONS *
*------------------------------*/
/**
* @brief Retrieve an entry from trie
*
* @param trie : the trie
* @param key : the key for the entry to retrieve
* @param o_data : the storage for storing the entry data on return
*
* @return boolean value indicating the existence of the entry.
*
* Retrieve an entry for the given @a key from @a trie. On return,
* if @a key is found and @a o_data is not NULL, @a *o_data is set
* to the data associated to @a key.
*/
bool
trie_retrieve (const Trie *trie, const AlphaChar *key, TrieData *o_data)
{
TrieIndex s;
short suffix_idx;
const AlphaChar *p;
/* walk through branches */
s = da_get_root (trie->da);
for (p = key; !trie_da_is_separate (trie->da, s); p++) {
TrieIndex tc = alpha_map_char_to_trie (trie->alpha_map, *p);
if (TRIE_INDEX_MAX == tc)
return false;
if (!da_walk (trie->da, &s, (TrieChar) tc))
return false;
if (0 == *p)
break;
}
/* walk through tail */
s = trie_da_get_tail_index (trie->da, s);
suffix_idx = 0;
for ( ; ; p++) {
TrieIndex tc = alpha_map_char_to_trie (trie->alpha_map, *p);
if (TRIE_INDEX_MAX == tc)
return false;
if (!tail_walk_char (trie->tail, s, &suffix_idx, (TrieChar) tc))
return false;
if (0 == *p)
break;
}
/* found, set the val and return */
if (o_data)
*o_data = tail_get_data (trie->tail, s);
return true;
}
/**
* @brief Store a value for an entry to trie
*
* @param trie : the trie
* @param key : the key for the entry to retrieve
* @param data : the data associated to the entry
*
* @return boolean value indicating the success of the operation
*
* Store a @a data for the given @a key in @a trie. If @a key does not
* exist in @a trie, it will be appended. If it does, its current data will
* be overwritten.
*/
bool
trie_store (Trie *trie, const AlphaChar *key, TrieData data)
{
return trie_store_conditionally (trie, key, data, true);
}
/**
* @brief Store a value for an entry to trie only if the key is not present
*
* @param trie : the trie
* @param key : the key for the entry to retrieve
* @param data : the data associated to the entry
*
* @return boolean value indicating the success of the operation
*
* Store a @a data for the given @a key in @a trie. If @a key does not
* exist in @a trie, it will be appended. If it does, the function will
* return failure and the existing value will not be touched.
*
* This can be useful for multi-thread applications, as race condition
* can be avoided.
*
* Available since: 0.2.4
*/
bool
trie_store_if_absent (Trie *trie, const AlphaChar *key, TrieData data)
{
return trie_store_conditionally (trie, key, data, false);
}
static bool
trie_store_conditionally (Trie *trie,
const AlphaChar *key,
TrieData data,
bool is_overwrite)
{
TrieIndex s, t;
short suffix_idx;
const AlphaChar *p, *sep;
/* walk through branches */
s = da_get_root (trie->da);
for (p = key; !trie_da_is_separate (trie->da, s); p++) {
TrieIndex tc = alpha_map_char_to_trie (trie->alpha_map, *p);
if (TRIE_INDEX_MAX == tc)
return false;
if (!da_walk (trie->da, &s, (TrieChar) tc)) {
TrieChar *key_str;
bool res;
key_str = alpha_map_char_to_trie_str (trie->alpha_map, p);
if (!key_str)
return false;
res = trie_branch_in_branch (trie, s, key_str, data);
free (key_str);
return res;
}
if (0 == *p)
break;
}
/* walk through tail */
sep = p;
t = trie_da_get_tail_index (trie->da, s);
suffix_idx = 0;
for ( ; ; p++) {
TrieIndex tc = alpha_map_char_to_trie (trie->alpha_map, *p);
if (TRIE_INDEX_MAX == tc)
return false;
if (!tail_walk_char (trie->tail, t, &suffix_idx, (TrieChar) tc)) {
TrieChar *tail_str;
bool res;
tail_str = alpha_map_char_to_trie_str (trie->alpha_map, sep);
if (!tail_str)
return false;
res = trie_branch_in_tail (trie, s, tail_str, data);
free (tail_str);
return res;
}
if (0 == *p)
break;
}
/* duplicated key, overwrite val if flagged */
if (!is_overwrite) {
return false;
}
tail_set_data (trie->tail, t, data);
trie->is_dirty = true;
return true;
}
static bool
trie_branch_in_branch (Trie *trie,
TrieIndex sep_node,
const TrieChar *suffix,
TrieData data)
{
TrieIndex new_da, new_tail;
new_da = da_insert_branch (trie->da, sep_node, *suffix);
if (TRIE_INDEX_ERROR == new_da)
return false;
if ('\0' != *suffix)
++suffix;
new_tail = tail_add_suffix (trie->tail, suffix);
tail_set_data (trie->tail, new_tail, data);
trie_da_set_tail_index (trie->da, new_da, new_tail);
trie->is_dirty = true;
return true;
}
static bool
trie_branch_in_tail (Trie *trie,
TrieIndex sep_node,
const TrieChar *suffix,
TrieData data)
{
TrieIndex old_tail, old_da, s;
const TrieChar *old_suffix, *p;
/* adjust separate point in old path */
old_tail = trie_da_get_tail_index (trie->da, sep_node);
old_suffix = tail_get_suffix (trie->tail, old_tail);
if (!old_suffix)
return false;
for (p = old_suffix, s = sep_node; *p == *suffix; p++, suffix++) {
TrieIndex t = da_insert_branch (trie->da, s, *p);
if (TRIE_INDEX_ERROR == t)
goto fail;
s = t;
}
old_da = da_insert_branch (trie->da, s, *p);
if (TRIE_INDEX_ERROR == old_da)
goto fail;
if ('\0' != *p)
++p;
tail_set_suffix (trie->tail, old_tail, p);
trie_da_set_tail_index (trie->da, old_da, old_tail);
/* insert the new branch at the new separate point */
return trie_branch_in_branch (trie, s, suffix, data);
fail:
/* failed, undo previous insertions and return error */
da_prune_upto (trie->da, sep_node, s);
trie_da_set_tail_index (trie->da, sep_node, old_tail);
return false;
}
/**
* @brief Delete an entry from trie
*
* @param trie : the trie
* @param key : the key for the entry to delete
*
* @return boolean value indicating whether the key exists and is removed
*
* Delete an entry for the given @a key from @a trie.
*/
bool
trie_delete (Trie *trie, const AlphaChar *key)
{
TrieIndex s, t;
short suffix_idx;
const AlphaChar *p;
/* walk through branches */
s = da_get_root (trie->da);
for (p = key; !trie_da_is_separate (trie->da, s); p++) {
TrieIndex tc = alpha_map_char_to_trie (trie->alpha_map, *p);
if (TRIE_INDEX_MAX == tc)
return false;
if (!da_walk (trie->da, &s, (TrieChar) tc))
return false;
if (0 == *p)
break;
}
/* walk through tail */
t = trie_da_get_tail_index (trie->da, s);
suffix_idx = 0;
for ( ; ; p++) {
TrieIndex tc = alpha_map_char_to_trie (trie->alpha_map, *p);
if (TRIE_INDEX_MAX == tc)
return false;
if (!tail_walk_char (trie->tail, t, &suffix_idx, (TrieChar) tc))
return false;
if (0 == *p)
break;
}
tail_delete (trie->tail, t);
da_set_base (trie->da, s, TRIE_INDEX_ERROR);
da_prune (trie->da, s);
trie->is_dirty = true;
return true;
}
/**
* @brief Enumerate entries in trie
*
* @param trie : the trie
* @param enum_func : the callback function to be called on each key
* @param user_data : user-supplied data to send as an argument to @a enum_func
*
* @return boolean value indicating whether all the keys are visited
*
* Enumerate all entries in trie. For each entry, the user-supplied
* @a enum_func callback function is called, with the entry key and data.
* Returning false from such callback will stop enumeration and return false.
*/
bool
trie_enumerate (const Trie *trie, TrieEnumFunc enum_func, void *user_data)
{
TrieState *root;
TrieIterator *iter;
bool cont = true;
root = trie_root (trie);
if (!root)
return false;
iter = trie_iterator_new (root);
if (!iter)
goto exit_root_created;
while (cont && trie_iterator_next (iter)) {
AlphaChar *key = trie_iterator_get_key (iter);
TrieData data = trie_iterator_get_data (iter);
cont = (*enum_func) (key, data, user_data);
free (key);
}
trie_iterator_free (iter);
trie_state_free (root);
return cont;
exit_root_created:
trie_state_free (root);
return false;
}
/*-------------------------------*
* STEPWISE QUERY OPERATIONS *
*-------------------------------*/
/**
* @brief Get root state of a trie
*
* @param trie : the trie
*
* @return the root state of the trie
*
* Get root state of @a trie, for stepwise walking.
*
* The returned state is allocated and must be freed with trie_state_free()
*/
TrieState *
trie_root (const Trie *trie)
{
return trie_state_new (trie, da_get_root (trie->da), 0, false);
}
/*----------------*
* TRIE STATE *
*----------------*/
static TrieState *
trie_state_new (const Trie *trie,
TrieIndex index,
short suffix_idx,
short is_suffix)
{
TrieState *s;
s = (TrieState *) malloc (sizeof (TrieState));
if (!s)
return NULL;
s->trie = trie;
s->index = index;
s->suffix_idx = suffix_idx;
s->is_suffix = is_suffix;
return s;
}
/**
* @brief Copy trie state to another
*
* @param dst : the destination state
* @param src : the source state
*
* Copy trie state data from @a src to @a dst. All existing data in @a dst
* is overwritten.
*/
void
trie_state_copy (TrieState *dst, const TrieState *src)
{
/* May be deep copy if necessary, not the case for now */
*dst = *src;
}
/**
* @brief Clone a trie state
*
* @param s : the state to clone
*
* @return an duplicated instance of @a s
*
* Make a copy of trie state.
*
* The returned state is allocated and must be freed with trie_state_free()
*/
TrieState *
trie_state_clone (const TrieState *s)
{
return trie_state_new (s->trie, s->index, s->suffix_idx, s->is_suffix);
}
/**
* @brief Free a trie state
*
* @param s : the state to free
*
* Free the trie state.
*/
void
trie_state_free (TrieState *s)
{
free (s);
}
/**
* @brief Rewind a trie state
*
* @param s : the state to rewind
*
* Put the state at root.
*/
void
trie_state_rewind (TrieState *s)
{
s->index = da_get_root (s->trie->da);
s->is_suffix = false;
}
/**
* @brief Walk the trie from the state
*
* @param s : current state
* @param c : key character for walking
*
* @return boolean value indicating the success of the walk
*
* Walk the trie stepwise, using a given character @a c.
* On return, the state @a s is updated to the new state if successfully walked.
*/
bool
trie_state_walk (TrieState *s, AlphaChar c)
{
TrieIndex tc = alpha_map_char_to_trie (s->trie->alpha_map, c);
if (TRIE_INDEX_MAX == tc)
return false;
if (!s->is_suffix) {
bool ret;
ret = da_walk (s->trie->da, &s->index, (TrieChar) tc);
if (ret && trie_da_is_separate (s->trie->da, s->index)) {
s->index = trie_da_get_tail_index (s->trie->da, s->index);
s->suffix_idx = 0;
s->is_suffix = true;
}
return ret;
} else {
return tail_walk_char (s->trie->tail, s->index, &s->suffix_idx,
(TrieChar) tc);
}
}
/**
* @brief Test walkability of character from state
*
* @param s : the state to check
* @param c : the input character
*
* @return boolean indicating walkability
*
* Test if there is a transition from state @a s with input character @a c.
*/
bool
trie_state_is_walkable (const TrieState *s, AlphaChar c)
{
TrieIndex tc = alpha_map_char_to_trie (s->trie->alpha_map, c);
if (TRIE_INDEX_MAX == tc)
return false;
if (!s->is_suffix)
return da_is_walkable (s->trie->da, s->index, (TrieChar) tc);
else
return tail_is_walkable_char (s->trie->tail, s->index, s->suffix_idx,
(TrieChar) tc);
}
/**
* @brief Get all walkable characters from state
*
* @param s : the state to get
* @param chars : the storage for the result
* @param chars_nelm : the size of @a chars[] in number of elements
*
* @return total walkable characters
*
* Get the list of all walkable characters from state @a s. At most
* @a chars_nelm walkable characters are stored in @a chars[] on return.
*
* The function returns the actual number of walkable characters from @a s.
* Note that this may not equal the number of characters stored in @a chars[]
* if @a chars_nelm is less than the actual number.
*
* Available since: 0.2.6
*/
int
trie_state_walkable_chars (const TrieState *s,
AlphaChar chars[],
int chars_nelm)
{
int syms_num = 0;
if (!s->is_suffix) {
Symbols *syms = da_output_symbols (s->trie->da, s->index);
int i;
syms_num = symbols_num (syms);
for (i = 0; i < syms_num && i < chars_nelm; i++) {
TrieChar tc = symbols_get (syms, i);
chars[i] = alpha_map_trie_to_char (s->trie->alpha_map, tc);
}
symbols_free (syms);
} else {
const TrieChar *suffix = tail_get_suffix (s->trie->tail, s->index);
chars[0] = alpha_map_trie_to_char (s->trie->alpha_map,
suffix[s->suffix_idx]);
syms_num = 1;
}
return syms_num;
}
/**
* @brief Check for single path
*
* @param s : the state to check
*
* @return boolean value indicating whether it is in a single path
*
* Check if the given state is in a single path, that is, there is no other
* branch from it to leaf.
*/
bool
trie_state_is_single (const TrieState *s)
{
return s->is_suffix;
}
/**
* @brief Get data from leaf state
*
* @param s : a leaf state
*
* @return the data associated with the leaf state @a s,
* or TRIE_DATA_ERROR if @a s is not a leaf state
*
* Get value from a leaf state of trie. Getting value from a non-leaf state
* will result in TRIE_DATA_ERROR.
*/
TrieData
trie_state_get_data (const TrieState *s)
{
return trie_state_is_leaf (s) ? tail_get_data (s->trie->tail, s->index)
: TRIE_DATA_ERROR;
}
/*---------------------*
* ENTRY ITERATION *
*---------------------*/
/**
* @brief Create a new trie iterator
*
* @param s : the TrieState to start iteration from
*
* @return a pointer to the newly created TrieIterator, or NULL on failure
*
* Create a new trie iterator for iterating entries of a sub-trie rooted at
* state @a s.
*
* Use it with the result of trie_root() to iterate the whole trie.
*
* The created object must be freed with trie_iterator_free().
*
* Available since: 0.2.6
*/
TrieIterator *
trie_iterator_new (TrieState *s)
{
TrieIterator *iter;
iter = (TrieIterator *) malloc (sizeof (TrieIterator));
if (!iter)
return NULL;
iter->root = s;
iter->state = NULL;
iter->key = NULL;
return iter;
}
/**
* @brief Free a trie iterator
*
* @param iter : the trie iterator to free
*
* Destruct the iterator @a iter and free its allocated memory.
*
* Available since: 0.2.6
*/
void
trie_iterator_free (TrieIterator *iter)
{
if (iter->state) {
trie_state_free (iter->state);
}
if (iter->key) {
trie_string_free (iter->key);
}
free (iter);
}
/**
* @brief Move trie iterator to the next entry
*
* @param iter : an iterator
*
* @return boolean value indicating the availability of the entry
*
* Move trie iterator to the next entry.
* On return, the iterator @a iter is updated to reference to the new entry
* if successfully moved.
*
* Available since: 0.2.6
*/
bool
trie_iterator_next (TrieIterator *iter)
{
TrieState *s = iter->state;
TrieIndex sep;
/* first iteration */
if (!s) {
s = iter->state = trie_state_clone (iter->root);
/* for tail state, we are already at the only entry */
if (s->is_suffix)
return true;
iter->key = trie_string_new (20);
sep = da_first_separate (s->trie->da, s->index, iter->key);
if (TRIE_INDEX_ERROR == sep)
return false;
s->index = sep;
return true;
}
/* no next entry for tail state */
if (s->is_suffix)
return false;
/* iter->state is a separate node */
sep = da_next_separate (s->trie->da, iter->root->index, s->index,
iter->key);
if (TRIE_INDEX_ERROR == sep)
return false;
s->index = sep;
return true;
}
/**
* @brief Get key for a trie iterator
*
* @param iter : an iterator
*
* @return the allocated key string; NULL on failure
*
* Get key for the current entry referenced by the trie iterator @a iter.
*
* The return string must be freed with free().
*
* Available since: 0.2.6
*/
AlphaChar *
trie_iterator_get_key (const TrieIterator *iter)
{
const TrieState *s;
const TrieChar *tail_str;
AlphaChar *alpha_key, *alpha_p;
s = iter->state;
if (!s)
return NULL;
/* if s is in tail, root == s */
if (s->is_suffix) {
tail_str = tail_get_suffix (s->trie->tail, s->index);
if (!tail_str)
return NULL;
tail_str += s->suffix_idx;
alpha_key = (AlphaChar *) malloc (sizeof (AlphaChar)
* (strlen ((const char *)tail_str)
+ 1));
alpha_p = alpha_key;
} else {
TrieIndex tail_idx;
int i, key_len;
const TrieChar *key_p;
tail_idx = trie_da_get_tail_index (s->trie->da, s->index);
tail_str = tail_get_suffix (s->trie->tail, tail_idx);
if (!tail_str)
return NULL;
key_len = trie_string_length (iter->key);
key_p = trie_string_get_val (iter->key);
alpha_key = (AlphaChar *) malloc (
sizeof (AlphaChar)
* (key_len + strlen ((const char *)tail_str) + 1)
);
alpha_p = alpha_key;
for (i = key_len; i > 0; i--) {
*alpha_p++ = alpha_map_trie_to_char (s->trie->alpha_map, *key_p++);
}
}
while (*tail_str) {
*alpha_p++ = alpha_map_trie_to_char (s->trie->alpha_map, *tail_str++);
}
*alpha_p = 0;
return alpha_key;
}
/**
* @brief Get data for the entry referenced by an iterator
*
* @param iter : an iterator
*
* @return the data associated with the entry referenced by iterator @a iter,
* or TRIE_DATA_ERROR if @a iter does not reference to a unique entry
*
* Get value for the entry referenced by an iterator. Getting value from an
* un-iterated (or broken for any reason) iterator will result in
* TRIE_DATA_ERROR.
*
* Available since: 0.2.6
*/
TrieData
trie_iterator_get_data (const TrieIterator *iter)
{
const TrieState *s = iter->state;
TrieIndex tail_index;
if (!s)
return TRIE_DATA_ERROR;
if (!s->is_suffix) {
if (!trie_da_is_separate (s->trie->da, s->index))
return TRIE_DATA_ERROR;
tail_index = trie_da_get_tail_index (s->trie->da, s->index);
} else {
tail_index = s->index;
}
return tail_get_data (s->trie->tail, tail_index);
}
/*
vi:ts=4:ai:expandtab
*/
axe-0.3.1/src/datrie/trie.h 0000664 0000000 0000000 00000015000 12635670016 0015445 0 ustar 00root root 0000000 0000000 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* libdatrie - Double-Array Trie Library
* Copyright (C) 2006 Theppitak Karoonboonyanan
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* trie.h - Trie data type and functions
* Created: 2006-08-11
* Author: Theppitak Karoonboonyanan
*/
#ifndef __TRIE_H
#define __TRIE_H
#include
#include
#ifdef __cplusplus
extern "C" {
#endif
/**
* @file trie.h
* @brief Trie data type and functions
*
* Trie is a kind of digital search tree, an efficient indexing method with
* O(1) time complexity for searching. Comparably as efficient as hashing,
* trie also provides flexibility on incremental matching and key spelling
* manipulation. This makes it ideal for lexical analyzers, as well as
* spelling dictionaries.
*
* This library is an implementation of double-array structure for representing
* trie, as proposed by Junichi Aoe. The details of the implementation can be
* found at http://linux.thai.net/~thep/datrie/datrie.html
*
* A Trie is associated with an AlphaMap, a map between actual alphabet
* characters and the raw character used to walk through trie.
* You can define the alphabet set by adding ranges of character codes
* to it before associating it to a trie. And the keys to be added to the trie
* must be only in such ranges.
*
* A new Trie can be created in memory using trie_new()
* It can even be embeded in another file using trie_fwrite() and read back
* using trie_fread().
* After use, Trie objects must be freed using trie_free().
*
* Operations on trie include:
*
* - Add/delete entries with trie_store() and trie_delete()
* - Retrieve entries with trie_retrieve()
* - Walk through trie stepwise with TrieState and its functions
* (trie_root(), trie_state_walk(), trie_state_rewind(),
* trie_state_clone(), trie_state_copy(),
* trie_state_is_walkable(), trie_state_walkable_chars(),
* trie_state_is_single(), trie_state_get_data().
* And do not forget to free TrieState objects with trie_state_free()
* after use.)
* - Enumerate all keys using trie_enumerate()
* - Iterate entries using TrieIterator and its functions
* (trie_iterator_new(), trie_iterator_next(), trie_iterator_get_key(),
* trie_iterator_get_data().
* And do not forget to free TrieIterator objects with trie_iterator_free()
* after use.)
*/
/**
* @brief Trie data type
*/
typedef struct _Trie Trie;
/**
* @brief Trie enumeration function
*
* @param key : the key of the entry
* @param data : the data of the entry
* @param user_data : the user-supplied data on enumerate call
*
* @return true to continue enumeration, false to stop
*/
typedef bool (*TrieEnumFunc) (const AlphaChar *key,
TrieData key_data,
void *user_data);
/**
* @brief Trie walking state
*/
typedef struct _TrieState TrieState;
/**
* @brief Trie iteration state
*/
typedef struct _TrieIterator TrieIterator;
/*-----------------------*
* GENERAL FUNCTIONS *
*-----------------------*/
Trie * trie_new (const AlphaMap *alpha_map);
void trie_free (Trie *trie);
bool trie_is_dirty (const Trie *trie);
/*------------------------------*
* GENERAL QUERY OPERATIONS *
*------------------------------*/
bool trie_retrieve (const Trie *trie,
const AlphaChar *key,
TrieData *o_data);
bool trie_store (Trie *trie, const AlphaChar *key, TrieData data);
bool trie_store_if_absent (Trie *trie, const AlphaChar *key, TrieData data);
bool trie_delete (Trie *trie, const AlphaChar *key);
bool trie_enumerate (const Trie *trie,
TrieEnumFunc enum_func,
void *user_data);
/*-------------------------------*
* STEPWISE QUERY OPERATIONS *
*-------------------------------*/
TrieState * trie_root (const Trie *trie);
/*----------------*
* TRIE STATE *
*----------------*/
TrieState * trie_state_clone (const TrieState *s);
void trie_state_copy (TrieState *dst, const TrieState *src);
void trie_state_free (TrieState *s);
void trie_state_rewind (TrieState *s);
bool trie_state_walk (TrieState *s, AlphaChar c);
bool trie_state_is_walkable (const TrieState *s, AlphaChar c);
int trie_state_walkable_chars (const TrieState *s,
AlphaChar chars[],
int chars_nelm);
/**
* @brief Check for terminal state
*
* @param s : the state to check
*
* @return boolean value indicating whether it is a terminal state
*
* Check if the given state is a terminal state. A terminal state is a trie
* state that terminates a key, and stores a value associated with it.
*/
#define trie_state_is_terminal(s) trie_state_is_walkable((s),TRIE_CHAR_TERM)
bool trie_state_is_single (const TrieState *s);
/**
* @brief Check for leaf state
*
* @param s : the state to check
*
* @return boolean value indicating whether it is a leaf state
*
* Check if the given state is a leaf state. A leaf state is a terminal state
* that has no other branch.
*/
#define trie_state_is_leaf(s) \
(trie_state_is_single(s) && trie_state_is_terminal(s))
TrieData trie_state_get_data (const TrieState *s);
/*----------------------*
* ENTRY ITERATION *
*----------------------*/
TrieIterator * trie_iterator_new (TrieState *s);
void trie_iterator_free (TrieIterator *iter);
bool trie_iterator_next (TrieIterator *iter);
AlphaChar * trie_iterator_get_key (const TrieIterator *iter);
TrieData trie_iterator_get_data (const TrieIterator *iter);
#ifdef __cplusplus
}
#endif
#endif /* __TRIE_H */
/*
vi:ts=4:ai:expandtab
*/
axe-0.3.1/src/datrie/triedefs.h 0000664 0000000 0000000 00000004212 12635670016 0016312 0 ustar 00root root 0000000 0000000 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* libdatrie - Double-Array Trie Library
* Copyright (C) 2006 Theppitak Karoonboonyanan
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* triedefs.h - General typedefs for trie
* Created: 2006-08-11
* Author: Theppitak Karoonboonyanan
*/
#ifndef __TRIEDEFS_H
#define __TRIEDEFS_H
#include
#include
/**
* @file triedefs.h
* @brief General typedefs for trie
*/
/**
* @brief Alphabet character type for use as input/output strings of trie keys
*/
typedef char AlphaChar;
/**
* @brief Error value for alphabet character
*/
#define ALPHA_CHAR_ERROR (~(AlphaChar)0)
/**
* @brief Raw character type mapped into packed set from AlphaChar,
* for use in actual trie transition calculations
*/
typedef uint8_t TrieChar;
/**
* @brief Trie terminator character
*/
#define TRIE_CHAR_TERM ((TrieChar) 0)
#define TRIE_CHAR_MAX UINT8_MAX
/**
* @brief Type of index into Trie double-array and tail structures
*/
typedef int32_t TrieIndex;
/**
* @brief Trie error index
*/
#define TRIE_INDEX_ERROR ((TrieIndex) 0)
/**
* @brief Maximum trie index value
*/
#define TRIE_INDEX_MAX INT32_MAX
/**
* @brief Type of value associated to trie entries
*/
typedef intptr_t TrieData;
/**
* @brief Trie error data
*/
#define TRIE_DATA_ERROR ((TrieData) -1)
#endif /* __TRIEDEFS_H */
/*
vi:ts=4:ai:expandtab
*/
axe-0.3.1/src/datrie/typedefs.h 0000664 0000000 0000000 00000002415 12635670016 0016333 0 ustar 00root root 0000000 0000000 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* libdatrie - Double-Array Trie Library
* Copyright (C) 2006 Theppitak Karoonboonyanan
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* typedefs.h - general types
* Created : 11 Aug 2006
* Author : Theppitak Karoonboonyanan
*/
#ifndef __TYPEDEFS_H
#define __TYPEDEFS_H
#include
#include
#include
typedef uint8_t byte;
typedef uint16_t word;
typedef uint32_t dword;
#endif /* __TYPEDEFS_H */
/*
vi:ts=4:ai:expandtab
*/
axe-0.3.1/src/gsl/ 0000775 0000000 0000000 00000000000 12635670016 0013652 5 ustar 00root root 0000000 0000000 axe-0.3.1/src/gsl/combination.c 0000664 0000000 0000000 00000007003 12635670016 0016320 0 ustar 00root root 0000000 0000000 /* combination/combination.c
* based on permutation/permutation.c by Brian Gough
*
* Copyright (C) 2001 Szymon Jaroszewicz
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or (at
* your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include "gsl_errno.h"
#include "gsl_combination.h"
size_t
gsl_combination_n (const gsl_combination * c)
{
return c->n ;
}
size_t
gsl_combination_k (const gsl_combination * c)
{
return c->k ;
}
size_t *
gsl_combination_data (const gsl_combination * c)
{
return c->data ;
}
int
gsl_combination_valid (gsl_combination * c)
{
const size_t n = c->n ;
const size_t k = c->k ;
size_t i, j ;
if( k > n )
{
GSL_ERROR("combination has k greater than n", GSL_FAILURE) ;
}
for (i = 0; i < k; i++)
{
const size_t ci = c->data[i];
if (ci >= n)
{
GSL_ERROR("combination index outside range", GSL_FAILURE) ;
}
for (j = 0; j < i; j++)
{
if (c->data[j] == ci)
{
GSL_ERROR("duplicate combination index", GSL_FAILURE) ;
}
if (c->data[j] > ci)
{
GSL_ERROR("combination indices not in increasing order",
GSL_FAILURE) ;
}
}
}
return GSL_SUCCESS;
}
int
gsl_combination_next (gsl_combination * c)
{
/* Replaces c with the next combination (in the standard lexicographical
* ordering). Returns GSL_FAILURE if there is no next combination.
*/
const size_t n = c->n;
const size_t k = c->k;
size_t *data = c->data;
size_t i;
if(k == 0)
{
return GSL_FAILURE;
}
i = k - 1;
while(i > 0 && data[i] == n - k + i)
{
i--;
}
if(i == 0 && data[i] == n - k)
{
return GSL_FAILURE;
}
data[i]++;
for(; i < k - 1; i++)
{
data[i + 1] = data[i] + 1;
}
return GSL_SUCCESS;
}
int
gsl_combination_prev (gsl_combination * c)
{
/* Replaces c with the previous combination (in the standard
* lexicographical ordering). Returns GSL_FAILURE if there is no
* previous combination.
*/
const size_t n = c->n;
const size_t k = c->k;
size_t *data = c->data;
size_t i;
if(k == 0)
{
return GSL_FAILURE;
}
i = k - 1;
while(i > 0 && data[i] == data[i-1] + 1)
{
i--;
}
if(i == 0 && data[i] == 0)
{
return GSL_FAILURE;
}
data[i++]--;
for(; i < k; i++)
{
data[i] = n - k + i;
}
return GSL_SUCCESS;
}
int
gsl_combination_memcpy (gsl_combination * dest, const gsl_combination * src)
{
const size_t src_n = src->n;
const size_t src_k = src->k;
const size_t dest_n = dest->n;
const size_t dest_k = dest->k;
if (src_n != dest_n || src_k != dest_k)
{
GSL_ERROR ("combination lengths are not equal", GSL_EBADLEN);
}
{
size_t j;
for (j = 0; j < src_k; j++)
{
dest->data[j] = src->data[j];
}
}
return GSL_SUCCESS;
}
axe-0.3.1/src/gsl/error.c 0000664 0000000 0000000 00000003761 12635670016 0015156 0 ustar 00root root 0000000 0000000 /* err/error.c
*
* Copyright (C) 1996, 1997, 1998, 1999, 2000, 2007 Gerard Jungman, Brian Gough
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or (at
* your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include
#include
#include
#include "gsl_errno.h"
#include "gsl_message.h"
gsl_error_handler_t * gsl_error_handler = NULL;
static void no_error_handler (const char *reason, const char *file, int line, int gsl_errno);
void
gsl_error (const char * reason, const char * file, int line, int gsl_errno)
{
if (gsl_error_handler)
{
(*gsl_error_handler) (reason, file, line, gsl_errno);
return ;
}
gsl_stream_printf ("ERROR", file, line, reason);
fflush (stdout);
fprintf (stderr, "Default GSL error handler invoked.\n");
fflush (stderr);
abort ();
}
gsl_error_handler_t *
gsl_set_error_handler (gsl_error_handler_t * new_handler)
{
gsl_error_handler_t * previous_handler = gsl_error_handler;
gsl_error_handler = new_handler;
return previous_handler;
}
gsl_error_handler_t *
gsl_set_error_handler_off (void)
{
gsl_error_handler_t * previous_handler = gsl_error_handler;
gsl_error_handler = no_error_handler;
return previous_handler;
}
static void
no_error_handler (const char *reason, const char *file, int line, int gsl_errno)
{
/* do nothing */
(void) reason;
(void) file;
(void) line;
(void) gsl_errno;
return;
}
axe-0.3.1/src/gsl/gsl_combination.h 0000664 0000000 0000000 00000004642 12635670016 0017200 0 ustar 00root root 0000000 0000000 /* combination/gsl_combination.h
* based on permutation/gsl_permutation.h by Brian Gough
*
* Copyright (C) 2001 Szymon Jaroszewicz
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or (at
* your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifndef __GSL_COMBINATION_H__
#define __GSL_COMBINATION_H__
#include
#include "gsl_errno.h"
#undef __BEGIN_DECLS
#undef __END_DECLS
#ifdef __cplusplus
# define __BEGIN_DECLS extern "C" {
# define __END_DECLS }
#else
# define __BEGIN_DECLS /* empty */
# define __END_DECLS /* empty */
#endif
__BEGIN_DECLS
struct gsl_combination_struct
{
size_t n;
size_t k;
size_t *data;
};
typedef struct gsl_combination_struct gsl_combination;
gsl_combination *gsl_combination_alloc (const size_t n, const size_t k);
gsl_combination *gsl_combination_calloc (const size_t n, const size_t k);
void gsl_combination_init_first (gsl_combination * c);
void gsl_combination_init_last (gsl_combination * c);
void gsl_combination_free (gsl_combination * c);
int gsl_combination_memcpy (gsl_combination * dest, const gsl_combination * src);
int gsl_combination_fread (FILE * stream, gsl_combination * c);
int gsl_combination_fwrite (FILE * stream, const gsl_combination * c);
int gsl_combination_fscanf (FILE * stream, gsl_combination * c);
int gsl_combination_fprintf (FILE * stream, const gsl_combination * c, const char *format);
size_t gsl_combination_n (const gsl_combination * c);
size_t gsl_combination_k (const gsl_combination * c);
size_t * gsl_combination_data (const gsl_combination * c);
int gsl_combination_valid (gsl_combination * c);
int gsl_combination_next (gsl_combination * c);
int gsl_combination_prev (gsl_combination * c);
static inline size_t
gsl_combination_get (const gsl_combination * c, const size_t i)
{
return c->data[i];
}
__END_DECLS
#endif /* __GSL_COMBINATION_H__ */
axe-0.3.1/src/gsl/gsl_errno.h 0000664 0000000 0000000 00000013452 12635670016 0016022 0 ustar 00root root 0000000 0000000 /* err/gsl_errno.h
*
* Copyright (C) 1996, 1997, 1998, 1999, 2000, 2007 Gerard Jungman, Brian Gough
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or (at
* your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifndef __GSL_ERRNO_H__
#define __GSL_ERRNO_H__
#include
#include
#undef __BEGIN_DECLS
#undef __END_DECLS
#ifdef __cplusplus
# define __BEGIN_DECLS extern "C" {
# define __END_DECLS }
#else
# define __BEGIN_DECLS /* empty */
# define __END_DECLS /* empty */
#endif
__BEGIN_DECLS
enum {
GSL_SUCCESS = 0,
GSL_FAILURE = -1,
GSL_CONTINUE = -2, /* iteration has not converged */
GSL_EDOM = 1, /* input domain error, e.g sqrt(-1) */
GSL_ERANGE = 2, /* output range error, e.g. exp(1e100) */
GSL_EFAULT = 3, /* invalid pointer */
GSL_EINVAL = 4, /* invalid argument supplied by user */
GSL_EFAILED = 5, /* generic failure */
GSL_EFACTOR = 6, /* factorization failed */
GSL_ESANITY = 7, /* sanity check failed - shouldn't happen */
GSL_ENOMEM = 8, /* malloc failed */
GSL_EBADFUNC = 9, /* problem with user-supplied function */
GSL_ERUNAWAY = 10, /* iterative process is out of control */
GSL_EMAXITER = 11, /* exceeded max number of iterations */
GSL_EZERODIV = 12, /* tried to divide by zero */
GSL_EBADTOL = 13, /* user specified an invalid tolerance */
GSL_ETOL = 14, /* failed to reach the specified tolerance */
GSL_EUNDRFLW = 15, /* underflow */
GSL_EOVRFLW = 16, /* overflow */
GSL_ELOSS = 17, /* loss of accuracy */
GSL_EROUND = 18, /* failed because of roundoff error */
GSL_EBADLEN = 19, /* matrix, vector lengths are not conformant */
GSL_ENOTSQR = 20, /* matrix not square */
GSL_ESING = 21, /* apparent singularity detected */
GSL_EDIVERGE = 22, /* integral or series is divergent */
GSL_EUNSUP = 23, /* requested feature is not supported by the hardware */
GSL_EUNIMPL = 24, /* requested feature not (yet) implemented */
GSL_ECACHE = 25, /* cache limit exceeded */
GSL_ETABLE = 26, /* table limit exceeded */
GSL_ENOPROG = 27, /* iteration is not making progress towards solution */
GSL_ENOPROGJ = 28, /* jacobian evaluations are not improving the solution */
GSL_ETOLF = 29, /* cannot reach the specified tolerance in F */
GSL_ETOLX = 30, /* cannot reach the specified tolerance in X */
GSL_ETOLG = 31, /* cannot reach the specified tolerance in gradient */
GSL_EOF = 32 /* end of file */
} ;
void gsl_error (const char * reason, const char * file, int line,
int gsl_errno);
void gsl_stream_printf (const char *label, const char *file,
int line, const char *reason);
const char * gsl_strerror (const int gsl_errno);
typedef void gsl_error_handler_t (const char * reason, const char * file,
int line, int gsl_errno);
typedef void gsl_stream_handler_t (const char * label, const char * file,
int line, const char * reason);
gsl_error_handler_t *
gsl_set_error_handler (gsl_error_handler_t * new_handler);
gsl_error_handler_t *
gsl_set_error_handler_off (void);
gsl_stream_handler_t *
gsl_set_stream_handler (gsl_stream_handler_t * new_handler);
FILE * gsl_set_stream (FILE * new_stream);
/* GSL_ERROR: call the error handler, and return the error code */
#define GSL_ERROR(reason, gsl_errno) \
do { \
gsl_error (reason, __FILE__, __LINE__, gsl_errno) ; \
return gsl_errno ; \
} while (0)
/* GSL_ERROR_VAL: call the error handler, and return the given value */
#define GSL_ERROR_VAL(reason, gsl_errno, value) \
do { \
gsl_error (reason, __FILE__, __LINE__, gsl_errno) ; \
return value ; \
} while (0)
/* GSL_ERROR_VOID: call the error handler, and then return
(for void functions which still need to generate an error) */
#define GSL_ERROR_VOID(reason, gsl_errno) \
do { \
gsl_error (reason, __FILE__, __LINE__, gsl_errno) ; \
return ; \
} while (0)
/* GSL_ERROR_NULL suitable for out-of-memory conditions */
#define GSL_ERROR_NULL(reason, gsl_errno) GSL_ERROR_VAL(reason, gsl_errno, 0)
/* Sometimes you have several status results returned from
* function calls and you want to combine them in some sensible
* way. You cannot produce a "total" status condition, but you can
* pick one from a set of conditions based on an implied hierarchy.
*
* In other words:
* you have: status_a, status_b, ...
* you want: status = (status_a if it is bad, or status_b if it is bad,...)
*
* In this example you consider status_a to be more important and
* it is checked first, followed by the others in the order specified.
*
* Here are some dumb macros to do this.
*/
#define GSL_ERROR_SELECT_2(a,b) ((a) != GSL_SUCCESS ? (a) : ((b) != GSL_SUCCESS ? (b) : GSL_SUCCESS))
#define GSL_ERROR_SELECT_3(a,b,c) ((a) != GSL_SUCCESS ? (a) : GSL_ERROR_SELECT_2(b,c))
#define GSL_ERROR_SELECT_4(a,b,c,d) ((a) != GSL_SUCCESS ? (a) : GSL_ERROR_SELECT_3(b,c,d))
#define GSL_ERROR_SELECT_5(a,b,c,d,e) ((a) != GSL_SUCCESS ? (a) : GSL_ERROR_SELECT_4(b,c,d,e))
#define GSL_STATUS_UPDATE(sp, s) do { if ((s) != GSL_SUCCESS) *(sp) = (s);} while(0)
__END_DECLS
#endif /* __GSL_ERRNO_H__ */
axe-0.3.1/src/gsl/gsl_message.h 0000664 0000000 0000000 00000004514 12635670016 0016320 0 ustar 00root root 0000000 0000000 /* err/gsl_message.h
*
* Copyright (C) 1996, 1997, 1998, 1999, 2000, 2007 Gerard Jungman, Brian Gough
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or (at
* your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifndef __GSL_MESSAGE_H__
#define __GSL_MESSAGE_H__
#undef __BEGIN_DECLS
#undef __END_DECLS
#ifdef __cplusplus
# define __BEGIN_DECLS extern "C" {
# define __END_DECLS }
#else
# define __BEGIN_DECLS /* empty */
# define __END_DECLS /* empty */
#endif
__BEGIN_DECLS
/* Provide a general messaging service for client use. Messages can
* be selectively turned off at compile time by defining an
* appropriate message mask. Client code which uses the GSL_MESSAGE()
* macro must provide a mask which is or'ed with the GSL_MESSAGE_MASK.
*
* The messaging service can be completely turned off
* by defining GSL_MESSAGING_OFF. */
void gsl_message(const char * message, const char * file, int line,
unsigned int mask);
#ifndef GSL_MESSAGE_MASK
#define GSL_MESSAGE_MASK 0xffffffffu /* default all messages allowed */
#endif
unsigned int gsl_message_mask ;
/* Provide some symolic masks for client ease of use. */
enum {
GSL_MESSAGE_MASK_A = 1,
GSL_MESSAGE_MASK_B = 2,
GSL_MESSAGE_MASK_C = 4,
GSL_MESSAGE_MASK_D = 8,
GSL_MESSAGE_MASK_E = 16,
GSL_MESSAGE_MASK_F = 32,
GSL_MESSAGE_MASK_G = 64,
GSL_MESSAGE_MASK_H = 128
} ;
#ifdef GSL_MESSAGING_OFF /* throw away messages */
#define GSL_MESSAGE(message, mask) do { } while(0)
#else /* output all messages */
#define GSL_MESSAGE(message, mask) \
do { \
if (mask & GSL_MESSAGE_MASK) \
gsl_message (message, __FILE__, __LINE__, mask) ; \
} while (0)
#endif
__END_DECLS
#endif /* __GSL_MESSAGE_H__ */
axe-0.3.1/src/gsl/init.c 0000664 0000000 0000000 00000005336 12635670016 0014770 0 ustar 00root root 0000000 0000000 /* combination/init.c
* based on permutation/init.c by Brian Gough
*
* Copyright (C) 2001 Szymon Jaroszewicz
* Copyright (C) 2009 Brian Gough
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or (at
* your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include
#include "gsl_errno.h"
#include "gsl_combination.h"
gsl_combination *
gsl_combination_alloc (const size_t n, const size_t k)
{
gsl_combination * c;
if (n == 0)
{
GSL_ERROR_VAL ("combination parameter n must be positive integer",
GSL_EDOM, 0);
}
if (k > n)
{
GSL_ERROR_VAL ("combination length k must be an integer less than or equal to n",
GSL_EDOM, 0);
}
c = (gsl_combination *) malloc (sizeof (gsl_combination));
if (c == 0)
{
GSL_ERROR_VAL ("failed to allocate space for combination struct",
GSL_ENOMEM, 0);
}
if (k > 0)
{
c->data = (size_t *) malloc (k * sizeof (size_t));
if (c->data == 0)
{
free (c); /* exception in constructor, avoid memory leak */
GSL_ERROR_VAL ("failed to allocate space for combination data",
GSL_ENOMEM, 0);
}
}
else
{
c->data = 0;
}
c->n = n;
c->k = k;
return c;
}
gsl_combination *
gsl_combination_calloc (const size_t n, const size_t k)
{
size_t i;
gsl_combination * c = gsl_combination_alloc (n, k);
if (c == 0)
return 0;
/* initialize combination to identity */
for (i = 0; i < k; i++)
{
c->data[i] = i;
}
return c;
}
void
gsl_combination_init_first (gsl_combination * c)
{
const size_t k = c->k ;
size_t i;
/* initialize combination to identity */
for (i = 0; i < k; i++)
{
c->data[i] = i;
}
}
void
gsl_combination_init_last (gsl_combination * c)
{
const size_t k = c->k ;
size_t i;
size_t n = c->n;
/* initialize combination to identity */
for (i = 0; i < k; i++)
{
c->data[i] = n - k + i;
}
}
void
gsl_combination_free (gsl_combination * c)
{
if (c == NULL) return;
if (c->k > 0) free (c->data);
free (c);
}
axe-0.3.1/src/gsl/message.c 0000664 0000000 0000000 00000002246 12635670016 0015446 0 ustar 00root root 0000000 0000000 /* err/message.c
*
* Copyright (C) 1996, 1997, 1998, 1999, 2000, 2007 Gerard Jungman, Brian Gough
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or (at
* your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include
#include
#include
#include "gsl_errno.h"
#include "gsl_message.h"
unsigned int gsl_message_mask = GSL_MESSAGE_MASK;
void
gsl_message (const char * reason, const char * file, int line,
unsigned int mask)
{
if (mask & gsl_message_mask)
{
gsl_stream_printf ("MESSAGE", file, line, reason);
}
}
axe-0.3.1/src/gsl/stream.c 0000664 0000000 0000000 00000003416 12635670016 0015315 0 ustar 00root root 0000000 0000000 /* err/stream.c
*
* Copyright (C) 1996, 1997, 1998, 1999, 2000, 2007 Gerard Jungman, Brian Gough
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or (at
* your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include
#include
#include
#include "gsl_errno.h"
#include "gsl_message.h"
FILE * gsl_stream = NULL ;
gsl_stream_handler_t * gsl_stream_handler = NULL;
void
gsl_stream_printf (const char *label, const char *file, int line,
const char *reason)
{
if (gsl_stream == NULL)
{
gsl_stream = stderr;
}
if (gsl_stream_handler)
{
(*gsl_stream_handler) (label, file, line, reason);
return;
}
fprintf (gsl_stream, "gsl: %s:%d: %s: %s\n", file, line, label, reason);
}
gsl_stream_handler_t *
gsl_set_stream_handler (gsl_stream_handler_t * new_handler)
{
gsl_stream_handler_t * previous_handler = gsl_stream_handler;
gsl_stream_handler = new_handler;
return previous_handler;
}
FILE *
gsl_set_stream (FILE * new_stream)
{
FILE * previous_stream;
if (gsl_stream == NULL) {
gsl_stream = stderr;
}
previous_stream = gsl_stream;
gsl_stream = new_stream;
return previous_stream;
}
axe-0.3.1/src/gsl/strerror.c 0000664 0000000 0000000 00000006456 12635670016 0015713 0 ustar 00root root 0000000 0000000 /* err/strerror.c
*
* Copyright (C) 1996, 1997, 1998, 1999, 2000, 2007 Gerard Jungman, Brian Gough
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or (at
* your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include "gsl_errno.h"
const char *
gsl_strerror (const int gsl_errno)
{
switch (gsl_errno)
{
case GSL_SUCCESS:
return "success" ;
case GSL_FAILURE:
return "failure" ;
case GSL_CONTINUE:
return "the iteration has not converged yet";
case GSL_EDOM:
return "input domain error" ;
case GSL_ERANGE:
return "output range error" ;
case GSL_EFAULT:
return "invalid pointer" ;
case GSL_EINVAL:
return "invalid argument supplied by user" ;
case GSL_EFAILED:
return "generic failure" ;
case GSL_EFACTOR:
return "factorization failed" ;
case GSL_ESANITY:
return "sanity check failed - shouldn't happen" ;
case GSL_ENOMEM:
return "malloc failed" ;
case GSL_EBADFUNC:
return "problem with user-supplied function";
case GSL_ERUNAWAY:
return "iterative process is out of control";
case GSL_EMAXITER:
return "exceeded max number of iterations" ;
case GSL_EZERODIV:
return "tried to divide by zero" ;
case GSL_EBADTOL:
return "specified tolerance is invalid or theoretically unattainable" ;
case GSL_ETOL:
return "failed to reach the specified tolerance" ;
case GSL_EUNDRFLW:
return "underflow" ;
case GSL_EOVRFLW:
return "overflow" ;
case GSL_ELOSS:
return "loss of accuracy" ;
case GSL_EROUND:
return "roundoff error" ;
case GSL_EBADLEN:
return "matrix/vector sizes are not conformant" ;
case GSL_ENOTSQR:
return "matrix not square" ;
case GSL_ESING:
return "singularity or extremely bad function behavior detected" ;
case GSL_EDIVERGE:
return "integral or series is divergent" ;
case GSL_EUNSUP:
return "the required feature is not supported by this hardware platform";
case GSL_EUNIMPL:
return "the requested feature is not (yet) implemented";
case GSL_ECACHE:
return "cache limit exceeded";
case GSL_ETABLE:
return "table limit exceeded";
case GSL_ENOPROG:
return "iteration is not making progress towards solution";
case GSL_ENOPROGJ:
return "jacobian evaluations are not improving the solution";
case GSL_ETOLF:
return "cannot reach the specified tolerance in F";
case GSL_ETOLX:
return "cannot reach the specified tolerance in X";
case GSL_ETOLG:
return "cannot reach the specified tolerance in gradient";
case GSL_EOF:
return "end of file";
default:
return "unknown error code" ;
}
}
axe-0.3.1/src/libqes/ 0000775 0000000 0000000 00000000000 12635670016 0014344 5 ustar 00root root 0000000 0000000 axe-0.3.1/src/main.c 0000664 0000000 0000000 00000035244 12635670016 0014165 0 ustar 00root root 0000000 0000000 /*
* ============================================================================
*
* Filename: axe_main.c
* Description: Main loop for axe
* Copyright: 2014-2015 Kevin Murray
* License: GNU GPL v3+
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*
* ============================================================================
*/
#include "axe.h"
#include
static void
print_version(void)
{
fprintf(stderr, "AXE Version %s\n", AXE_VERSION);
}
static void
print_help(void)
{
fprintf(stderr, "All mandatory short options are mandatory in their\n");
fprintf(stderr, "long option form. Likewise, all short options that take\n");
fprintf(stderr, "an argument must be given an argument in their long form\n");
fprintf(stderr, "\n");
fprintf(stderr, "If a forward read input is given, a forward read output\n");
fprintf(stderr, "must be. Likewise for a reverse/interleaved input. If either\n");
fprintf(stderr, "forward and/or reverse reads are given, interleaved input\n");
fprintf(stderr, "cannot be. However, one can input interleaved paired reads\n");
fprintf(stderr, "and output separate forwards and reverse reads, and vice versa.\n");
fprintf(stderr, "\n");
fprintf(stderr, "The barcode file is a tab-separated tabular file with an\n");
fprintf(stderr, "optional header, and has two alternative formats. The standard\n");
fprintf(stderr, "form (see below) is expected unless --combinatorial is given.\n");
fprintf(stderr, "\n");
fprintf(stderr, "The standard format is:\n");
fprintf(stderr, "Barcode\tID\n");
fprintf(stderr, "ACTA\tA1\n");
fprintf(stderr, "CCTC\tA2\n");
fprintf(stderr, "...\n");
fprintf(stderr, "\n");
fprintf(stderr, "The combinatorial format is:\n");
fprintf(stderr, "Barcode1\tBarcode2\tID\n");
fprintf(stderr, "ACTA\tACGC\tA1\n");
fprintf(stderr, "CCTC\tTCTA\tA2\n");
fprintf(stderr, "...\n");
fprintf(stderr, "\n");
}
static void
print_usage(void)
{
print_version();
fprintf(stderr, "\nUSAGE:\n");
fprintf(stderr, "axe-demux [-mzc2pt] -b (-f [-r] | -i) (-F [-R] | -I)\n");
fprintf(stderr, "axe-demux -h\n");
fprintf(stderr, "axe-demux -v\n\n");
fprintf(stderr, "OPTIONS:\n");
fprintf(stderr, " -m, --mismatch\tMaximum hamming distance mismatch. [int, default 1]\n");
fprintf(stderr, " -z, --ziplevel\tGzip compression level, or 0 for plain text [int, default 0]\n");
fprintf(stderr, " -c, --combinatorial\tUse combinatorial barcode matching. [flag, default OFF]\n");
fprintf(stderr, " -p, --permissive\tDon't error on barcode mismatch confict, matching only\n");
fprintf(stderr, " \texactly for conficting barcodes. [flag, default OFF]\n");
fprintf(stderr, " -2, --trim-r2\tTrim barcode from R2 read as well as R1. [flag, default OFF]\n");
fprintf(stderr, " -b, --barcodes\tBarcode file. See --help for example. [file]\n");
fprintf(stderr, " -f, --fwd-in\tInput forward read. [file]\n");
fprintf(stderr, " -F, --fwd-out\tOutput forward read prefix. [file]\n");
fprintf(stderr, " -r, --rev-in\tInput reverse read. [file]\n");
fprintf(stderr, " -R, --rev-out\tOutput reverse read prefix. [file]\n");
fprintf(stderr, " -i, --ilfq-in\tInput interleaved paired reads. [file]\n");
fprintf(stderr, " -I, --ilfq-out\tOutput interleaved paired reads prefix. [file]\n");
fprintf(stderr, " -t, --table-file\tOutput a summary table of demultiplexing statistics to file. [file]\n");
fprintf(stderr, " -h, --help\t\tPrint this usage plus additional help.\n");
fprintf(stderr, " -V, --version\tPrint version string.\n");
fprintf(stderr, " -v, --verbose\tBe more verbose. Additive, -vv is more vebose than -v.\n");
fprintf(stderr, " -q, --quiet\t\tBe very quiet.\n");
fprintf(stderr, "\n");
}
static const char *axe_opts = "m:z:c2pb:f:F:r:R:i:I:t:hVvqd";
static const struct option axe_longopts[] = {
{ "mismatch", optional_argument, NULL, 'm' },
{ "ziplevel", required_argument, NULL, 'z' },
{ "combinatorial", no_argument, NULL, 'c' },
{ "trim-r2", no_argument, NULL, '2' },
{ "permissive", no_argument, NULL, 'p' },
{ "barcodes", required_argument, NULL, 'b' },
{ "fwd-in", required_argument, NULL, 'f' },
{ "fwd-out", required_argument, NULL, 'F' },
{ "rev-in", required_argument, NULL, 'r' },
{ "rev-out", required_argument, NULL, 'R' },
{ "ilfq-in", required_argument, NULL, 'i' },
{ "ilfq-out", required_argument, NULL, 'I' },
{ "table-file", required_argument, NULL, 't' },
{ "help", no_argument, NULL, 'h' },
{ "version", no_argument, NULL, 'V' },
{ "verbose", no_argument, NULL, 'v' },
{ "debug", no_argument, NULL, 'd' },
{ NULL, 0, NULL, 0 }
};
static int
parse_args(struct axe_config *config, int argc, char * const *argv)
{
int c = 0;
int optind = 0;
if (argc < 2 ) {
return 1;
}
if (!axe_config_ok(config) || argc < 1 || argv == NULL) {
goto error;
}
/* Set some sane defaults */
/* Most things will default to 0 as we `calloc` the config struct, so we
* don't need to explicity set them. */
config->mismatches = 1;
config->verbosity = 0;
config->out_compress_level = 0;
/* Parse argv using getopt */
while ((c = getopt_long(argc, argv, axe_opts, axe_longopts, &optind)) > 0){
switch (c) {
case 'm':
config->mismatches = atol(optarg);
break;
case 'z':
config->out_compress_level = atoi(optarg);
break;
case 'c':
config->match_combo |= 1;
break;
case 'p':
config->permissive |= 1;
break;
case '2':
config->trim_rev |= 1;
break;
case 'b':
config->barcode_file = strdup(optarg);
break;
case 'f':
if (config->in_mode == READS_INTERLEAVED) {
goto error;
break;
}
config->infiles[0] = strdup(optarg);
if (config->in_mode == READS_UNKNOWN) {
config->in_mode = READS_SINGLE;
}
break;
case 'F':
config->out_prefixes[0] = strdup(optarg);
config->out_mode = READS_SINGLE;
break;
case 'r':
if (config->in_mode == READS_INTERLEAVED) {
goto error;
break;
}
config->infiles[1] = strdup(optarg);
config->in_mode = READS_PAIRED;
break;
case 'R':
config->out_prefixes[1] = strdup(optarg);
config->out_mode = READS_PAIRED;
break;
case 'i':
config->infiles[0] = strdup(optarg);
config->in_mode = READS_INTERLEAVED;
break;
case 'I':
config->out_prefixes[0] = strdup(optarg);
config->out_mode = READS_INTERLEAVED;
break;
case 't':
config->table_file = strdup(optarg);
break;
case 'h':
goto help;
case 'V':
goto version;
case 'v':
config->verbosity += 1;
break;
case 'q':
config->verbosity -= 1;
break;
case 'd':
config->debug = 1;
break;
case '?':
default:
/* Getopt long prints its own error msg */
goto error;
}
}
/* Check options are sane */
if (config->barcode_file == NULL) {
fprintf(stderr, "ERROR: Barcode file must be provided\n");
goto error;
}
if (config->mismatches > 4) {
fprintf(stderr, "ERROR: Silly mismatch level %zu\n",
config->mismatches);
goto error;
}
if (config->in_mode == READS_UNKNOWN) {
fprintf(stderr, "ERROR: Input file(s) must be provided\n");
goto error;
}
if (config->infiles[0] == NULL) {
switch (config->in_mode) {
case READS_SINGLE:
fprintf(stderr, "ERROR: Setting forward read input file failed.\n");
break;
case READS_PAIRED:
fprintf(stderr, "ERROR: Forward read file must be provided.\n");
break;
case READS_INTERLEAVED:
fprintf(stderr, "ERROR: Setting interleaved input file failed.\n");
break;
case READS_UNKNOWN:
default:
break;
}
goto error;
}
if (config->infiles[1] == NULL) {
switch (config->in_mode) {
case READS_SINGLE:
case READS_INTERLEAVED:
/* Not an error */
break;
case READS_PAIRED:
fprintf(stderr, "ERROR: Setting revese read input file failed.\n");
goto error;
break;
case READS_UNKNOWN:
default:
goto error;
break;
}
}
if (config->infiles[1] != NULL) {
switch (config->in_mode) {
case READS_PAIRED:
/* Not an error */
break;
case READS_INTERLEAVED:
fprintf(stderr, "ERROR: Revese read input file set in interleaved mode.\n");
goto error;
break;
case READS_SINGLE:
fprintf(stderr, "ERROR: Revese read input file set in single-end mode.\n");
goto error;
break;
case READS_UNKNOWN:
default:
/* Misc weirdness */
goto error;
break;
}
}
if (config->out_prefixes[0] == NULL) {
switch (config->out_mode) {
case READS_SINGLE:
fprintf(stderr, "ERROR: Setting forward read output prefix failed.\n");
break;
case READS_PAIRED:
fprintf(stderr, "ERROR: Forward read prefix must be provided.\n");
break;
case READS_INTERLEAVED:
fprintf(stderr, "ERROR: Setting interleaved output prefix failed.\n");
break;
case READS_UNKNOWN:
default:
break;
}
goto error;
}
if (config->out_prefixes[1] == NULL) {
switch (config->out_mode) {
case READS_SINGLE:
case READS_INTERLEAVED:
/* Not an error */
break;
case READS_PAIRED:
fprintf(stderr, "ERROR: Setting revese read output prefix failed.\n");
goto error;
break;
case READS_UNKNOWN:
default:
goto error;
break;
}
}
if (config->out_prefixes[1] != NULL) {
switch (config->out_mode) {
case READS_PAIRED:
/* Not an error */
break;
case READS_INTERLEAVED:
fprintf(stderr, "ERROR: Revese read output prefix set in interleaved mode.\n");
goto error;
break;
case READS_SINGLE:
case READS_UNKNOWN:
default:
/* Misc weirdness */
goto error;
break;
}
}
config->have_cli_opts = 1;
format_call_number = 0;
qes_logger_init(config->logger, "[axe] ", QES_LOG_DEBUG);
qes_logger_add_destination_formatted(config->logger, stderr, QES_LOG_DEBUG,
&axe_formatter);
return 0;
error:
fprintf(stderr,
"Axe failed due to bad CLI flags. Consult the usage below please!\n\n");
config->have_cli_opts = 0;
return 1;
help:
config->have_cli_opts = 0;
return 2;
version:
print_version();
axe_config_destroy(config);
exit(0);
}
int
main (int argc, char * const *argv)
{
int ret = 0;
struct axe_config *config = axe_config_create();
if (config == NULL) {
ret = EXIT_FAILURE;
goto end;
}
ret = parse_args(config, argc, argv);
if (ret != 0) {
print_usage();
if (ret == 2) {
print_help();
}
goto end;
}
ret = axe_read_barcodes(config);
if (ret != 0) {
fprintf(stderr, "[main] ERROR: axe_read_barcodes returned %i\n", ret);
fprintf(stderr, "\tThis indicates that the barcode file is invalid.\n");
fprintf(stderr, "\tPlease check that it conforms to the layout described in the help message\n");
goto end;
}
ret = axe_setup_barcode_lookup(config);
if (ret != 0) {
fprintf(stderr, "[main] ERROR: axe_setup_barcode_lookup returned %i\n",
ret);
goto end;
}
ret = axe_make_tries(config);
if (ret != 0) {
fprintf(stderr, "[main] ERROR: axe_make_tries returned %i\n", ret);
goto end;
}
ret = axe_load_tries(config);
if (ret != 0) {
fprintf(stderr, "[main] ERROR: axe_load_tries returned %i\n", ret);
goto end;
}
ret = axe_make_outputs(config);
if (ret != 0) {
fprintf(stderr, "[main] ERROR: axe_make_outputs returned %i\n", ret);
goto end;
}
ret = axe_process_file(config);
if (ret != 0) {
fprintf(stderr, "[main] ERROR: axe_process_file returned %i\n", ret);
goto end;
}
ret = axe_print_summary(config);
if (ret != 0) {
fprintf(stderr, "[main] ERROR: axe_print_summary returned %i\n", ret);
goto end;
}
ret = axe_write_table(config);
if (ret != 0) {
fprintf(stderr, "[main] ERROR: axe_write_table returned %i\n", ret);
goto end;
}
end:
axe_config_destroy(config);
return ret;
}
axe-0.3.1/tests/ 0000775 0000000 0000000 00000000000 12635670016 0013440 5 ustar 00root root 0000000 0000000 axe-0.3.1/tests/CMakeLists.txt 0000664 0000000 0000000 00000007160 12635670016 0016204 0 ustar 00root root 0000000 0000000 INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/tinytest)
ADD_EXECUTABLE(test_axe test.c ${CMAKE_CURRENT_SOURCE_DIR}/tinytest/tinytest.c test_libaxe.c)
TARGET_LINK_LIBRARIES(test_axe ${AXE_DEPENDS_LIBRARIES} axelib)
# Copy test files over to bin dir & make output
ADD_CUSTOM_TARGET(setup_tests ALL
COMMAND ${CMAKE_COMMAND} -E copy_directory
${CMAKE_CURRENT_SOURCE_DIR}/data
${CMAKE_BINARY_DIR}/data
COMMAND ${CMAKE_COMMAND} -E copy
${CMAKE_CURRENT_SOURCE_DIR}/axe_cli_tests.py
${CMAKE_BINARY_DIR}/bin
COMMAND ${CMAKE_COMMAND} -E make_directory
${CMAKE_BINARY_DIR}/out)
ADD_DEPENDENCIES(test_axe setup_tests)
ADD_TEST(NAME "UnitTests" COMMAND test_axe)
SET(COVERAGE_CMD test_axe)
SET(COVERAGE_OUT "${CMAKE_BINARY_DIR}/coverage_html")
ADD_TEST(NAME "IntegrationTests" COMMAND python
${CMAKE_BINARY_DIR}/bin/axe_cli_tests.py
${CMAKE_BINARY_DIR})
IF (CMAKE_BUILD_TYPE STREQUAL "Coverage")
FIND_PROGRAM( GCOV_PATH gcov )
FIND_PROGRAM( LCOV_PATH lcov )
FIND_PROGRAM( GENHTML_PATH genhtml )
FIND_PROGRAM( GCOVR_PATH gcovr PATHS ${CMAKE_SOURCE_DIR}/tests)
SET(CMAKE_C_FLAGS_COVERAGE
"${CMAKE_C_FLAGS_DEBUG} -g -O0 --coverage -fprofile-arcs -ftest-coverage"
CACHE STRING "Flags used by the C compiler during coverage builds."
FORCE)
SET(CMAKE_EXE_LINKER_FLAGS_COVERAGE
""
CACHE STRING "Flags used for linking binaries during coverage builds."
FORCE)
SET(CMAKE_SHARED_LINKER_FLAGS_COVERAGE
""
CACHE STRING "Flags used by the shared libraries linker during coverage builds."
FORCE)
MARK_AS_ADVANCED(
CMAKE_C_FLAGS_COVERAGE
CMAKE_EXE_LINKER_FLAGS_COVERAGE
CMAKE_SHARED_LINKER_FLAGS_COVERAGE )
IF(NOT GCOV_PATH)
MESSAGE(FATAL_ERROR "gcov not found! Aborting...")
ENDIF() # NOT GCOV_PATH
IF(NOT LCOV_PATH)
MESSAGE(FATAL_ERROR "lcov not found! Aborting...")
ENDIF() # NOT LCOV_PATH
IF(NOT GENHTML_PATH)
MESSAGE(FATAL_ERROR "genhtml not found! Aborting...")
ENDIF() # NOT GENHTML_PATH
IF(NOT CMAKE_COMPILER_IS_GNUCC AND NOT CMAKE_COMPILER_IS_GNUCXX)
# Clang version 3.0.0 and greater now supports gcov as well.
MESSAGE(WARNING "Compiler is not GNU gcc! Clang Version 3.0.0 and greater supports gcov as well, but older versions don't.")
IF(NOT "${CMAKE_C_COMPILER_ID}" STREQUAL "Clang" AND NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
MESSAGE(FATAL_ERROR "Compiler is not GNU gcc! Aborting...")
ENDIF()
ENDIF() # NOT CMAKE_COMPILER_IS_GNUCXX
# Setup target
ADD_CUSTOM_TARGET(coverage
# Cleanup lcov
COMMAND ${LCOV_PATH} --rc lcov_branch_coverage=1 --directory ${CMAKE_BINARY_DIR}/src --zerocounters
# Run tests
COMMAND ${COVERAGE_CMD}
# Capturing lcov counters and generating report
COMMAND ${LCOV_PATH} --rc lcov_branch_coverage=1 --directory ${CMAKE_BINARY_DIR}/src --capture --output-file ${COVERAGE_OUT}.info
COMMAND ${LCOV_PATH} --rc lcov_branch_coverage=1 --output-file ${COVERAGE_OUT}.info --remove ${COVERAGE_OUT}.info src/kmlib/*
COMMAND ${LCOV_PATH} --rc lcov_branch_coverage=1 --output-file ${COVERAGE_OUT}.info --remove ${COVERAGE_OUT}.info src/datrie/*
COMMAND ${GENHTML_PATH} --branch-coverage -o ${COVERAGE_OUT} ${COVERAGE_OUT}.info
COMMAND ${CMAKE_COMMAND} -E remove ${COVERAGE_OUT}.info
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
COMMENT "Resetting code coverage counters to zero.\nProcessing code coverage counters and generating report."
)
# Show info where to find the report
ADD_CUSTOM_COMMAND(TARGET coverage POST_BUILD
COMMAND ;
COMMENT "Open ${COVERAGE_OUT}/index.html in your browser to view the coverage report."
)
ENDIF() # build type coverage
axe-0.3.1/tests/axe_cli_tests.py 0000775 0000000 0000000 00000016523 12635670016 0016652 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
from __future__ import print_function
import hashlib
import logging
import os
from os import path
import re
import shutil
import subprocess as sp
import sys
import unittest
if len(sys.argv) < 2:
print("USAGE: axe_cli_tests.py $CMAKE_BINARY_DIR")
exit(-1)
CMAKE_BINARY_DIR = sys.argv.pop(1)
def md5sum(filename):
h = hashlib.md5()
with open(filename, 'rb') as fh:
while True:
hunk = fh.read(1024 ** 2)
if not hunk:
break
h.update(hunk)
return h.hexdigest()
class AxeTest(unittest.TestCase):
maxDiff= None
def __init__(self, methodName='runTest'):
super(AxeTest, self).__init__(methodName)
self.data = path.join(CMAKE_BINARY_DIR, "data")
self.out = path.join(CMAKE_BINARY_DIR, "out", "cli_tests")
self.axe = path.join(CMAKE_BINARY_DIR, "bin", "axe-demux")
self.log = logging.getLogger("AxeTest")
if not path.exists(self.data) or not path.exists(self.axe):
print("Please run axe_cli_tests.py after compiling axe")
exit(-1)
def setUp(self):
if not path.exists(self.out):
os.makedirs(self.out)
def run_and_check_stdout(self, command):
self.log.debug(" ".join(command))
try:
output = sp.check_output(command, stderr=sp.STDOUT)
except sp.CalledProcessError as err:
self.log.info(err.output)
return False
return True
def get_md5_dict(self):
dct = {}
for root, dirs, files in os.walk(self.out):
for fle in files:
md5 = md5sum(path.join(root, fle))
dct[path.basename(fle)] = md5
return dct
def tearDown(self):
if path.exists(self.out):
shutil.rmtree(self.out)
class TestBadUsage(AxeTest):
def __init__(self, methodName='runTest'):
super(TestBadUsage, self).__init__(methodName)
def test_bad_command(self):
command = [self.axe, "-f"]
self.assertFalse(self.run_and_check_stdout(command))
self.assertDictEqual({}, self.get_md5_dict())
class TestPareSE(AxeTest):
def __init__(self, methodName='runTest'):
super(TestPareSE, self).__init__(methodName)
self.infq = path.join(self.data, "pare.fq.gz")
self.barcodes = path.join(self.data, "pare.barcodes")
self.outfq = path.join(self.out, "pare_se")
self.nobcdfq = path.join(self.out, "pare_se_unknown_R1.fastq")
def test_pare_se(self):
command = [self.axe,
"-f", self.infq,
"-F", self.outfq,
'-b', self.barcodes,
]
self.assertTrue(self.run_and_check_stdout(command))
files = {
'pare_se_1_R1.fastq': 'd41d8cd98f00b204e9800998ecf8427e',
'pare_se_2_R1.fastq': 'd41d8cd98f00b204e9800998ecf8427e',
'pare_se_3_R1.fastq': 'd41d8cd98f00b204e9800998ecf8427e',
'pare_se_4_R1.fastq': '8e5eef3323e597b209f79dc9fcd74c9a',
'pare_se_5_R1.fastq': 'd41d8cd98f00b204e9800998ecf8427e',
'pare_se_6_R1.fastq': '7228a165f353920328360dedc3a41205',
'pare_se_7_R1.fastq': 'd41d8cd98f00b204e9800998ecf8427e',
'pare_se_8_R1.fastq': 'b349d3276ba7c7515d0093b1a49b3959',
'pare_se_9_R1.fastq': '74b4763271aefcc135425b06730874ba',
'pare_se_unknown_R1.fastq': 'd450569dd8fd4bdddffbfaeec4980273',
}
self.assertDictEqual(files, self.get_md5_dict())
def test_pare_se_zip(self):
command = [self.axe,
"-f", self.infq,
"-F", self.outfq,
'-b', self.barcodes,
'-z', '9',
]
files = {
'pare_se_1_R1.fastq.gz': '4a4dd3598707603b3f76a2378a4504aa',
'pare_se_2_R1.fastq.gz': '4a4dd3598707603b3f76a2378a4504aa',
'pare_se_3_R1.fastq.gz': '4a4dd3598707603b3f76a2378a4504aa',
'pare_se_4_R1.fastq.gz': '96d21b860a0fc70641ea43d350433d11',
'pare_se_5_R1.fastq.gz': '4a4dd3598707603b3f76a2378a4504aa',
'pare_se_6_R1.fastq.gz': 'd6044c04f79c358e4a1d443f8828df18',
'pare_se_7_R1.fastq.gz': '4a4dd3598707603b3f76a2378a4504aa',
'pare_se_8_R1.fastq.gz': 'da77b8e95827d362a1702ce4fe75c7a9',
'pare_se_9_R1.fastq.gz': '9c160b0daa0c73e5ef0994206774a5a0',
'pare_se_unknown_R1.fastq.gz': 'afd5737935814d756e89c365d2d61c7b',
}
self.assertTrue(self.run_and_check_stdout(command))
self.assertDictEqual(files, self.get_md5_dict())
class TestFakeSE(AxeTest):
files = {
'fake_se_1_R1.fastq': '836eaf06938d4a41122f284ed487a9c7',
'fake_se_2_R1.fastq': '836eaf06938d4a41122f284ed487a9c7',
'fake_se_unknown_R1.fastq': '836eaf06938d4a41122f284ed487a9c7',
}
zfiles = {
'fake_se_1_R1.fastq.gz': '3e07353d24a3ecd315067250a6be6047',
'fake_se_2_R1.fastq.gz': '3e07353d24a3ecd315067250a6be6047',
'fake_se_unknown_R1.fastq.gz': '3e07353d24a3ecd315067250a6be6047',
}
def __init__(self, methodName='runTest'):
super(TestFakeSE, self).__init__(methodName)
self.barcodes = path.join(self.data, "fake.barcodes")
self.outfq = path.join(self.out, "fake_se")
self.nobcdfq = path.join(self.out, "fake_se_unknown_R1.fastq")
def _do_test(self, mm_level):
infq = path.join(self.data, "fake_{}mm_R1.fq.gz".format(mm_level))
command = [self.axe,
"-f", infq,
"-F", self.outfq,
'-b', self.barcodes,
]
self.assertTrue(self.run_and_check_stdout(command))
def _do_test_zip(self, mm_level):
infq = path.join(self.data, "fake_{}mm_R1.fq.gz".format(mm_level))
command = [self.axe,
"-f", infq,
"-F", self.outfq,
'-b', self.barcodes,
'-z', '9',
]
self.assertTrue(self.run_and_check_stdout(command))
def test_fake_se_0mm(self):
self._do_test(0)
self.assertDictEqual(self.files, self.get_md5_dict())
def test_fake_se_0mm_zip(self):
self._do_test_zip(0)
self.assertDictEqual(self.zfiles, self.get_md5_dict())
def test_fake_se_1mm(self):
self._do_test(1)
self.assertDictEqual(self.files, self.get_md5_dict())
def test_fake_se_1mm_zip(self):
self._do_test_zip(1)
self.assertDictEqual(self.zfiles, self.get_md5_dict())
def test_fake_se_2mm(self):
self._do_test(2)
files = {
'fake_se_1_R1.fastq': 'd41d8cd98f00b204e9800998ecf8427e',
'fake_se_2_R1.fastq': 'd41d8cd98f00b204e9800998ecf8427e',
'fake_se_unknown_R1.fastq': 'a6de105b6c5abbc2d0d16440333adc64',
}
self.assertDictEqual(files, self.get_md5_dict())
def test_fake_se_2mm_zip(self):
self._do_test_zip(2)
zfiles = {
'fake_se_1_R1.fastq.gz': '4a4dd3598707603b3f76a2378a4504aa',
'fake_se_2_R1.fastq.gz': '4a4dd3598707603b3f76a2378a4504aa',
'fake_se_unknown_R1.fastq.gz': 'ee6979b139dbd898f058fd7649f87da2',
}
self.assertDictEqual(zfiles, self.get_md5_dict())
if __name__ == '__main__':
log = logging.getLogger("AxeTest")
fmt = logging.Formatter('%(message)s')
cons = logging.StreamHandler()
cons.setLevel(logging.DEBUG)
cons.setFormatter(fmt)
log.addHandler(cons)
log.setLevel(logging.DEBUG)
unittest.main()
axe-0.3.1/tests/data/ 0000775 0000000 0000000 00000000000 12635670016 0014351 5 ustar 00root root 0000000 0000000 axe-0.3.1/tests/data/fake.barcodes 0000664 0000000 0000000 00000000035 12635670016 0016761 0 ustar 00root root 0000000 0000000 Barcode ID
ATCACG 1
CGATGT 2
axe-0.3.1/tests/data/fake_0mm_R1.fq.gz 0000664 0000000 0000000 00000000360 12635670016 0017340 0 ustar 00root root 0000000 0000000 S fake_0mm_R1.fq Pn08
X@lKZʒq~QNǻ# ^m?oaL A#0pai-Y
UL7;鲪S);$}n/v[ܝ_r)B"Y6 BX[e8
Z$>~][%jT"ÒYOX(^+3QN彫د(
Ձ axe-0.3.1/tests/data/fake_1mm_R1.fq.gz 0000664 0000000 0000000 00000000361 12635670016 0017342 0 ustar 00root root 0000000 0000000 S fake_1mm_R1.fq Pn08
X@lKZʒN~QMǻ#)^m?oaL A#0pai-UL7=6˪^KNcN/7I)7ͽb-"$"%+jӛP.U^ 1IJUVOeZ*2,).ԏ0T(os?9K axe-0.3.1/tests/data/fake_2mm_R1.fq.gz 0000664 0000000 0000000 00000000361 12635670016 0017343 0 ustar 00root root 0000000 0000000 S fake_2mm_R1.fq Pn08
X@lKZʒN~QM;)^m?oaL A#0pai-UL7=6˪^KNqy
b-"$"%+jӛP.U^ 1IJ*Q2-
}BDŀ\Yr*]k'OFqTǜ} axe-0.3.1/tests/data/gbs.barcodes 0000664 0000000 0000000 00000005155 12635670016 0016636 0 ustar 00root root 0000000 0000000 Barcode1 Barcode2 ID
CTCGTGCAG CGAGTGCAG A1
TGCATGCAG TGCATGCAG A2
ACTATGCAG TAGTTGCAG A3
CAGATGCAG TCTGTGCAG A4
AACTTGCAG AGTTTGCAG A5
GCGTTGCAG ACGCTGCAG A6
CGATTGCAG ATCGTGCAG A7
GTAATGCAG TTACTGCAG A8
AGGGTGCAG CCCTTGCAG A9
GATGTGCAG CATCTGCAG A10
TCAGTGCAG CTGATGCAG A11
TGCGATGCAG TCGCATGCAG A12
CGCTTTGCAG AAGCGTGCAG B1
TCACGTGCAG CGTGATGCAG B2
CTAGGTGCAG CCTAGTGCAG B3
ACAAATGCAG TTTGTTGCAG B4
TTCTGTGCAG CAGAATGCAG B5
AGCCGTGCAG CGGCTTGCAG B6
GTATTTGCAG AATACTGCAG B7
CTGTATGCAG TACAGTGCAG B8
ACCGTTGCAG ACGGTTGCAG B9
GCTTATGCAG TAAGCTGCAG B10
GGTGTTGCAG ACACCTGCAG B11
AGGATTGCAG ATCCTTGCAG B12
ATTGATGCAG TCAATTGCAG C1
CATCTTGCAG AGATGTGCAG C2
CCTAGTGCAG CTAGGTGCAG C3
GAGGATGCAG TCCTCTGCAG C4
GGAAGTGCAG CTTCCTGCAG C5
GTCAATGCAG TTGACTGCAG C6
TAATATGCAG TATTATGCAG C7
TACATTGCAG ATGTATGCAG C8
TCGTTTGCAG AACGATGCAG C9
GGTTGTTGCAG ACAACCTGCAG C10
CCAGCTTGCAG AGCTGGTGCAG C11
TTCAGATGCAG TCTGAATGCAG C12
TAGGAATGCAG TTCCTATGCAG D1
GCTCTATGCAG TAGAGCTGCAG D2
CCACAATGCAG TTGTGGTGCAG D3
CTTCCATGCAG TGGAAGTGCAG D4
GAGATATGCAG TATCTCTGCAG D5
ATGCCTTGCAG AGGCATTGCAG D6
AGTGGATGCAG TCCACTTGCAG D7
ACCTAATGCAG TTAGGTTGCAG D8
ATATGTTGCAG ACATATTGCAG D9
ATCGTATGCAG TACGATTGCAG D10
CATCGTTGCAG ACGATGTGCAG D11
CGCGGTTGCAG ACCGCGTGCAG D12
CTATTATGCAG TAATAGTGCAG E1
GCCAGTTGCAG ACTGGCTGCAG E2
GGAAGATGCAG TCTTCCTGCAG E3
GTACTTTGCAG AAGTACTGCAG E4
GTTGAATGCAG TTCAACTGCAG E5
TAACGATGCAG TCGTTATGCAG E6
TGGCTATGCAG TAGCCATGCAG E7
TATTTTTTGCAG AAAAATATGCAG E8
CTTGCTTTGCAG AAGCAAGTGCAG E9
ATGAAAGTGCAG CTTTCATTGCAG E10
AAAAGTTTGCAG AACTTTTTGCAG E11
GAATTCATGCAG TGAATTCTGCAG E12
GAACTTGTGCAG CAAGTTCTGCAG F1
GGACCTATGCAG TAGGTCCTGCAG F2
GTCGATTTGCAG AATCGACTGCAG F3
AACGCCTTGCAG AGGCGTTTGCAG F4
AATATGGTGCAG CCATATTTGCAG F5
ACGTGTTTGCAG AACACGTTGCAG F6
ATTAATTTGCAG AATTAATTGCAG F7
ATTGGATTGCAG ATCCAATTGCAG F8
CATAAGTTGCAG ACTTATGTGCAG F9
CGCTGATTGCAG ATCAGCGTGCAG F10
CGGTAGATGCAG TCTACCGTGCAG F11
CTACGGATGCAG TCCGTAGTGCAG F12
GCGGAATTGCAG ATTCCGCTGCAG G1
TAGCGGATGCAG TCCGCTATGCAG G2
TCGAAGATGCAG TCTTCGATGCAG G3
TCTGTGATGCAG TCACAGATGCAG G4
TGCTGGATGCAG TCCAGCATGCAG G5
ACGACTAGTGCAG CTAGTCGTTGCAG G6
TAGCATGGTGCAG CCATGCTATGCAG G7
TAGGCCATTGCAG ATGGCCTATGCAG G8
TGCAAGGATGCAG TCCTTGCATGCAG G9
TGGTACGTTGCAG ACGTACCATGCAG G10
TCTCAGTGTGCAG CACTGAGATGCAG G11
CCGGATATTGCAG ATATCCGGTGCAG G12
CGCCTTATTGCAG ATAAGGCGTGCAG H1
AACCGAGATGCAG TCTCGGTTTGCAG H2
ACAGGGAATGCAG TTCCCTGTTGCAG H3
ACGTGGTATGCAG TACCACGTTGCAG H4
CCATGGGTTGCAG ACCCATGGTGCAG H5
CGCGGAGATGCAG TCTCCGCGTGCAG H6
CGTGTGGTTGCAG ACCACACGTGCAG H7
GCTGTGGATGCAG TCCACAGCTGCAG H8
GGATTGGTTGCAG ACCAATCCTGCAG H9
GTGAGGGTTGCAG ACCCTCACTGCAG H10
TATCGGGATGCAG TCCCGATATGCAG H11
TTCCTGGATGCAG TCCAGGAATGCAG H12
axe-0.3.1/tests/data/gbs_R1.fastq.gz 0000664 0000000 0000000 00000255460 12635670016 0017161 0 ustar 00root root 0000000 0000000 S gbs_R1.fastq ْF%|O1fEJTn bn0dx TlEV1k/|>KQex]?.|Exuw}\!bpDЇ)EtD|O.W_SQ~o/
*WkahC;M.q|k&4s]Vq}3ƺP7Ut>U={_P`me__e~uҟח^L
CI^pY'D~$}a0M*/_Ve])a#/G+$Df<4}}տ)ēS`{>qk[ec Yyv6L˯GY5(5D^ Eȷ?!$|ҫ'8{z0cia6}cК>ZCۺx#e&91ȿWd_^;QY7%a#DTF˷O(_c=·a B3fW~9v`O:jh!~>|e+d{NN?A\$fSo#G+vk+0N_vƜ\ӛN=zܽ0s[C]Лp*LX~1PKG^/<~9
Q~]i2nٴ6#W^~sb2Q3!;+AeГ7HB;O?/7_?ҋ=}>ݛ9\yuvH`阐=yJ>V}(˿n
q!xhG@#159@6 %/^kF1I'&Dp|D*q/bWY_~6M?p|+/;q
Z5ѨN :ÔH)SNVn2LbXo9RX@3Cl q5b|:k kzK˿W?iDtF\m
td9IMDvDr1$f3\7tQǡq&n8N#Yr?L[
F+}x>)K4n _4}_7w?w1Xlz9X<8PD+4!5Mv6aq"s_G)A{[@Of_c?V?;ZqMa{A+oSW=?=h+an>6In`e;OHaS!+,'6KKiCB
3;ϧ_-8.#pn,~));CvI-zq`%' ܾra6Ǽ|NQfJJ1Pnc_w 0[2] ;@HlhtХ,/_s} O4#+pa+˷d g
Z8r⠟Y?l(&/` oyo8&/g?>l角"#~1K\` x :C|:8.QHSLOxn!s7%K8Uc9@Ob2
vgx(x{6oOӴ^~XOuۨr9pCϐP`OA%k6^
%](p'Ƒ"qE t+6t&~D|D?y>kprC#=kr*o~:!"x_)o1\/-gϪtm(o
{*֏bqL5h+ALwX%|햯I
$K*W~5|88 '[frM} gל}]p9[o>W
{Eؓm<=_8 _"qO[! (|TD}P(?nX3s΄n൮s7=U]^
u.n`)) V+KGj1 G@OJNxyfJJ]}7L1wQ*4?ˣ{1P?ub{;P,l;{
==,z~R!^ dLBɂ㤨Ȁ$&|S2%#Ư,K8L| d|瘿_N䭒+_@x1/