pax_global_header 0000666 0000000 0000000 00000000064 14073306636 0014522 g ustar 00root root 0000000 0000000 52 comment=64c5f3f5b4d14a5c87bbeea860bf8bc6c5930c79
alleleCount-4.3.0/ 0000775 0000000 0000000 00000000000 14073306636 0013775 5 ustar 00root root 0000000 0000000 alleleCount-4.3.0/.gitignore 0000664 0000000 0000000 00000000472 14073306636 0015770 0 ustar 00root root 0000000 0000000 /licence_head.txt
/MANIFEST
/MYMETA.json
/MYMETA.yml
/perl/docs/
/perl/blib/
/perltidy.LOG
/perl/reports
/perl/pm_to_blib
/perl/Makefile
/perl/perltidy.LOG
/perl/MYMETA.json
/perl/MYMETA.yml
/setup.log
/c/bin/*
c/src/*.o
c/tests/*_tests
c/tests/*_tests.dSYM
c/tests/tests_log
install_tmp
install
test.old
test.new
alleleCount-4.3.0/.travis.yml 0000664 0000000 0000000 00000000572 14073306636 0016112 0 ustar 00root root 0000000 0000000 notifications:
slack: wtsi-cgpit:ptUMR1tkNyZJYd9TpGoss8WR
email: false
sudo: false
language: ruby
services:
- docker
script:
- echo 'Build and check docker image'
- docker build -t allelec .
- docker images | grep -c allelec
- echo 'Verify program(s) from this repo'
- docker run -t --rm allelec alleleCounter -v
- docker run -t --rm allelec alleleCounter.pl -v
alleleCount-4.3.0/CHANGES.md 0000664 0000000 0000000 00000003765 14073306636 0015402 0 ustar 00root root 0000000 0000000 # CHANGES
## v4.3.0
* Add script to convert allelecount output to JSON
## v4.2.1
* Update so docker and native install use same install scripts behind the scenes
* Docker now actually has htslib 1.11
* htslib compiled with libdeflate for improved performance
## v4.2.0
* Updated to hstlib 1.11
## v4.1.0
* Created Docker file and build scripts to generate a containeraized code
## v4.0.2
* Added checking of iterator error codes when calling sam_itr_next
## v4.0.1
### Behaviour change
**When the proper pair filter flag is used, this code now checks that the paired-end orientation is also used.**
**This will mean that mate-pair orientation (F/F or R/R) will be rejected**
* Where a proper pair filter is used, now check for the correct paired-end orientation of F/R.
* If this is not met the read is ignored.
## v4.0.0
* alleleCounter now counts **_per-fragment_** rather than per-read when overlaps occur.
* Reworked perl to wrap C alleleCounter and just handle the extra format changes.
* No dep on Bio::DB::HTS now.
* Update to HTSlib 1.7
* Merged #43, providing 10X processing mode.
## v3.3.1
* Fix setup.sh bug skipping samtools install
## v3.3.0
* Added -d commandline option. It triggers 'dense' mode. Best used where there
* are many SNPs for example AscatNGS and Battenberg allelecount steps
* Added -f commandline option. Flag value of reads to retain in allele counting
* Added -F commandline option. Flag value of reads to exclude in allele counting
## v3.1.0
* Adds filter and keep flags commandline options for read filtering
* Adds dense SNP option
* Change install of Bio::DB::HTS to use fixed version of htslib and Bio::DB::HTS
## v3.0.0
* Removes dependancy on legacy versions of samtools in perl code.
* Upgrades to more recent version of htslib not requiring patch.
## v2.2.0
* Added contig filter commandline option
## v2.1.0
* Added version info to makefile and option to display to alleleCount C code.
* Fixed bug in c code where region wasn't malloc-ing enough for the contig name.
alleleCount-4.3.0/Dockerfile 0000664 0000000 0000000 00000005365 14073306636 0016000 0 ustar 00root root 0000000 0000000 FROM ubuntu:20.04 as builder
USER root
# ALL tool versions used by opt-build.sh
# need to keep in sync with setup.sh
ENV VER_HTSLIB="1.11"
ENV VER_LIBDEFLATE="v1.6"
RUN apt-get -yq update
RUN apt-get install -yq --no-install-recommends
RUN apt-get install -yq --no-install-recommends build-essential
RUN apt-get install -yq --no-install-recommends apt-transport-https
RUN apt-get install -yq --no-install-recommends curl
RUN apt-get install -yq --no-install-recommends ca-certificates
RUN apt-get install -yq --no-install-recommends make
RUN apt-get install -yq --no-install-recommends bzip2
RUN apt-get install -yq --no-install-recommends gcc
RUN apt-get install -yq --no-install-recommends locales
RUN apt-get install -yq --no-install-recommends curl
RUN apt-get install -yq --no-install-recommends wget
RUN apt-get install -yq --no-install-recommends libtasn1-dev
RUN apt-get install -yq --no-install-recommends nettle-dev
RUN apt-get install -yq --no-install-recommends libgmp-dev
RUN apt-get install -yq --no-install-recommends libp11-kit-dev
RUN apt-get install -yq --no-install-recommends zlib1g-dev
RUN apt-get install -yq --no-install-recommends libbz2-dev
RUN apt-get install -yq --no-install-recommends liblzma-dev
RUN apt-get install -yq --no-install-recommends libcurl4-gnutls-dev
RUN apt-get install -yq --no-install-recommends libncurses5-dev
RUN locale-gen en_US.UTF-8
RUN update-locale LANG=en_US.UTF-8
ENV OPT /opt/wtsi-cgp
ENV PATH $OPT/bin:$OPT/biobambam2/bin:$PATH
ENV PERL5LIB $OPT/lib/perl5
ENV LD_LIBRARY_PATH $OPT/lib
ENV LC_ALL en_US.UTF-8
ENV LANG en_US.UTF-8
# build tools from other repos
ADD build/opt-build.sh build/
RUN bash build/opt-build.sh $OPT
# build the tools in this repo, separate to reduce build time on errors
COPY . .
RUN bash build/opt-build-local.sh $OPT
FROM ubuntu:20.04
LABEL maintainer="cgphelp@sanger.ac.uk" \
uk.ac.sanger.cgp="Cancer, Ageing and Somatic Mutation, Wellcome Trust Sanger Institute" \
description="alleleCount docker"
RUN apt-get -yq update
RUN apt-get install -yq --no-install-recommends \
apt-transport-https \
locales \
curl \
ca-certificates \
libperlio-gzip-perl \
bzip2 \
psmisc \
time \
zlib1g \
liblzma5 \
libncurses5 \
p11-kit \
unattended-upgrades && \
unattended-upgrade -d -v && \
apt-get remove -yq unattended-upgrades && \
apt-get autoremove -yq
RUN locale-gen en_US.UTF-8
RUN update-locale LANG=en_US.UTF-8
ENV OPT /opt/wtsi-cgp
ENV PATH $OPT/bin:$OPT/biobambam2/bin:$PATH
ENV PERL5LIB $OPT/lib/perl5
ENV LD_LIBRARY_PATH $OPT/lib
ENV LC_ALL en_US.UTF-8
ENV LANG en_US.UTF-8
RUN mkdir -p $OPT
COPY --from=builder $OPT $OPT
## USER CONFIGURATION
RUN adduser --disabled-password --gecos '' ubuntu && chsh -s /bin/bash && mkdir -p /home/ubuntu
USER ubuntu
WORKDIR /home/ubuntu
CMD ["/bin/bash"]
alleleCount-4.3.0/LICENCE 0000664 0000000 0000000 00000103330 14073306636 0014762 0 ustar 00root root 0000000 0000000 GNU AFFERO GENERAL PUBLIC LICENSE
Version 3, 19 November 2007
Copyright (C) 2007 Free Software Foundation, Inc.
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The GNU Affero General Public License is a free, copyleft license for
software and other kinds of works, specifically designed to ensure
cooperation with the community in the case of network server software.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
our General Public Licenses are intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
Developers that use our General Public Licenses protect your rights
with two steps: (1) assert copyright on the software, and (2) offer
you this License which gives you legal permission to copy, distribute
and/or modify the software.
A secondary benefit of defending all users' freedom is that
improvements made in alternate versions of the program, if they
receive widespread use, become available for other developers to
incorporate. Many developers of free software are heartened and
encouraged by the resulting cooperation. However, in the case of
software used on network servers, this result may fail to come about.
The GNU General Public License permits making a modified version and
letting the public access it on a server without ever releasing its
source code to the public.
The GNU Affero General Public License is designed specifically to
ensure that, in such cases, the modified source code becomes available
to the community. It requires the operator of a network server to
provide the source code of the modified version running there to the
users of that server. Therefore, public use of a modified version, on
a publicly accessible server, gives the public access to the source
code of the modified version.
An older license, called the Affero General Public License and
published by Affero, was designed to accomplish similar goals. This is
a different license, not a version of the Affero GPL, but Affero has
released a new version of the Affero GPL which permits relicensing under
this license.
The precise terms and conditions for copying, distribution and
modification follow.
TERMS AND CONDITIONS
0. Definitions.
"This License" refers to version 3 of the GNU Affero General Public License.
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
"The Program" refers to any copyrightable work licensed under this
License. Each licensee is addressed as "you". "Licensees" and
"recipients" may be individuals or organizations.
To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy. The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.
A "covered work" means either the unmodified Program or a work based
on the Program.
To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
To "convey" a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.
An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
1. Source Code.
The "source code" for a work means the preferred form of the work
for making modifications to it. "Object code" means any non-source
form of a work.
A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.
The Corresponding Source for a work in source code form is that
same work.
2. Basic Permissions.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force. You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
Conveying under any other circumstances is permitted solely under
the conditions stated below. Sublicensing is not allowed; section 10
makes it unnecessary.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.
4. Conveying Verbatim Copies.
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to
"keep intact all notices".
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the
Corresponding Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission, provided
you inform other peers where the object code and Corresponding
Source of the work are being offered to the general public at no
charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling. In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage. For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product. A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.
"Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source. The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed. Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
"Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law. If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material, or
requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors or
authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions of
it) with contractual assumptions of liability to the recipient, for
any liability that these contractual assumptions directly impose on
those licensors and authors.
All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or
run a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's "contributor version".
A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License. You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all. For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
13. Remote Network Interaction; Use with the GNU General Public License.
Notwithstanding any other provision of this License, if you modify the
Program, your modified version must prominently offer all users
interacting with it remotely through a computer network (if your version
supports such interaction) an opportunity to receive the Corresponding
Source of your version by providing access to the Corresponding Source
from a network server at no charge, through some standard or customary
means of facilitating copying of software. This Corresponding Source
shall include the Corresponding Source for any work covered by version 3
of the GNU General Public License that is incorporated pursuant to the
following paragraph.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the work with which it is combined will remain governed by version
3 of the GNU General Public License.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
the GNU Affero General Public License from time to time. Such new versions
will be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies that a certain numbered version of the GNU Affero General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
GNU Affero General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
versions of the GNU Affero General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
Copyright (C)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
Also add information on how to contact you by electronic and paper mail.
If your software can interact with users remotely through a computer
network, you should also make sure that it provides a way for users to
get its source. For example, if your program is a web application, its
interface could display a "Source" link that leads users to an archive
of the code. There are many ways you could offer source, and different
solutions will be better for different programs; see section 13 for the
specific requirements.
You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU AGPL, see
.
alleleCount-4.3.0/README.md 0000664 0000000 0000000 00000012656 14073306636 0015266 0 ustar 00root root 0000000 0000000 # alleleCount
The alleleCount package primarily exists to prevent code duplication between some other projects,
specifically AscatNGS and Battenberg.
[![Quay Badge][quay-status]][quay-repo]
| Master | Develop |
| --------------------------------------------- | ----------------------------------------------- |
| [![Master Badge][travis-master]][travis-base] | [![Develop Badge][travis-develop]][travis-base] |
The project previously contained 2 equivalent implementations of allele counting code in perl and C
for BAM/CRAM processing. As of v4 the perl code wraps the C implementation in order to preserve the
ability to use alleleCounter for those still using the perl implementation whilst using the speed of
the C implementation without loosing the additional features it provides.
- [Usage](#usage)
- [C version](#c-version)
- [Perl version](#perl-version)
- [Loci files](#loci-files)
- [Generic loci File](#generic-loci-file)
- [SNP6 loci file (perl only)](#snp6-loci-file-perl-only)
- [Dependencies/Install](#dependenciesinstall)
- [Docker, Singularity and Dockstore](#docker-singularity-and-dockstore)
- [Creating a release](#creating-a-release)
- [Preparation](#preparation)
- [Cutting the release](#cutting-the-release)
- [LICENCE](#licence)
## Usage
Assuming you have added the installation location to your path:
### C version
Accepts locai file as described below only and generates a tsv output of allele counts.
For parameters please see the command line help:
```
alleleCounter --help
```
Please note use of the long form parameter names with values requires '=', e.g. `--min-base-qual=10`.
### Perl version
The perl version has additional options for alternative types of input/output.
```
alleleCounter.pl --help
```
## Loci files
### Generic loci File
The base input for both tools is a simple tab formatted file of chromosome and 1-based positions, e.g.
```
...
```
If using the `--dense-snps` mode (C only) please ensure the file is sorted via:
```
sort -k1,1 -n 2,2n loci_unsrt.tsv > losi_sorted.tsv
```
### SNP6 loci file (perl only)
```
...
```
Output file is different.
### Dependencies/Install
Some of the code included in this package has dependencies:
* [htslib](https://github.com/samtools/htslib)
And various utility perl modules.
These are all installed for you by running:
./setup.sh /some/install/location
Please be aware that this expects basic C compilation libraries and tools to be available.
## Docker, Singularity and Dockstore
There is a pre-built image containing this codebase on quay.io.
* [dockstore-cgpwgs][ds-cgpwgs-git]: Contains additional tools for WGS analysis.
This was primarily designed for use with dockstore.org but can be used as normal containers.
The docker images are know to work correctly after import into a singularity image.
## Creating a release
### Preparation
* Commit/push all relevant changes.
* Pull a clean version of the repo and use this for the following steps.
### Cutting the release
1. Update `lib/Sanger/CGP/AlleleCount.pm` to the correct version.
1. Update `CHANGES.md` to show major items.
1. Run `./prerelease.sh`
1. Check all tests and coverage reports are acceptable.
1. Commit the updated docs tree and updated module/version.
1. Push commits.
1. Use the GitHub tools to draft a release.
## LICENCE
```
Copyright (c) 2014-2020 Genome Research Ltd.
Author: CASM/Cancer IT
This file is part of alleleCount.
alleleCount is free software: you can redistribute it and/or modify it under
the terms of the GNU Affero General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option) any
later version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
1. The usage of a range of years within a copyright statement contained within
this distribution should be interpreted as being equivalent to a list of years
including the first and last year specified and all consecutive years between
them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007-
2009, 2011-2012’ should be interpreted as being identical to a statement that
reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright
statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being
identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008,
2009, 2010, 2011, 2012’."
```
[travis-base]: https://travis-ci.org/cancerit/alleleCount
[travis-master]: https://travis-ci.org/cancerit/alleleCount.svg?branch=master
[travis-develop]: https://travis-ci.org/cancerit/alleleCount.svg?branch=dev
[ds-cgpwgs-git]: https://github.com/cancerit/dockstore-cgpwgs
[quay-status]: https://quay.io/repository/wtsicgp/allelecount/status
[quay-repo]: https://quay.io/repository/wtsicgp/allelecount
[quay-builds]: https://quay.io/repository/wtsicgp/allelecount?tab=builds
alleleCount-4.3.0/build/ 0000775 0000000 0000000 00000000000 14073306636 0015074 5 ustar 00root root 0000000 0000000 alleleCount-4.3.0/build/opt-build-local.sh 0000775 0000000 0000000 00000003336 14073306636 0020427 0 ustar 00root root 0000000 0000000 #! /bin/bash
set -xe
if [[ -z "${TMPDIR}" ]]; then
TMPDIR=/tmp
fi
set -u
if [ "$#" -lt "1" ] ; then
echo "Please provide an installation path such as /opt/ICGC"
exit 1
fi
# get path to this script
SCRIPT_PATH=`dirname $0`;
SCRIPT_PATH=`(cd $SCRIPT_PATH && pwd)`
# get the location to install to
INST_PATH=$1
mkdir -p $1
INST_PATH=`(cd $1 && pwd)`
echo $INST_PATH
# get current directory
INIT_DIR=`pwd`
CPU=`grep -c ^processor /proc/cpuinfo`
if [ $? -eq 0 ]; then
if [ "$CPU" -gt "6" ]; then
CPU=6
fi
else
CPU=1
fi
echo "Max compilation CPUs set to $CPU"
SETUP_DIR=$INIT_DIR/install_tmp
mkdir -p $SETUP_DIR/distro # don't delete the actual distro directory until the very end
mkdir -p $INST_PATH/bin
cd $SETUP_DIR
# make sure tools installed can see the install loc of libraries
set +u
export LD_LIBRARY_PATH=`echo $INST_PATH/lib:$LD_LIBRARY_PATH | perl -pe 's/:\$//;'`
export PATH=`echo $INST_PATH/bin:$PATH | perl -pe 's/:\$//;'`
export MANPATH=`echo $INST_PATH/man:$INST_PATH/share/man:$MANPATH | perl -pe 's/:\$//;'`
export PERL5LIB=`echo $INST_PATH/lib/perl5:$PERL5LIB | perl -pe 's/:\$//;'`
CPANM=`which cpanm`
echo "Installing Perl prerequisites ..."
$CPANM --no-interactive --notest --mirror http://cpan.metacpan.org -l $INST_PATH/ --installdeps $INIT_DIR/perl/. < /dev/null
set -u
### alleleCount
echo "Building alleleCounter ..."
if [ ! -e $SETUP_DIR/alleleCount.success ]; then
#build the C part
cd $INIT_DIR
mkdir -p $INIT_DIR/c/bin
make -C c clean
export prefix=$INST_PATH
make -C c -j$CPU
cp $INIT_DIR/c/bin/alleleCounter $INST_PATH/bin/.
#build the perl part
cd $INIT_DIR/perl
perl Makefile.PL INSTALL_BASE=$INST_PATH
make
make test
make install
touch $SETUP_DIR/alleleCounter.success
fi
alleleCount-4.3.0/build/opt-build.sh 0000775 0000000 0000000 00000005447 14073306636 0017344 0 ustar 00root root 0000000 0000000 #! /bin/bash
set -xe
if [[ -z "${TMPDIR}" ]]; then
TMPDIR=/tmp
fi
set -u
if [ "$#" -lt "1" ] ; then
echo "Please provide an installation path such as /opt/ICGC"
exit 1
fi
# get path to this script
SCRIPT_PATH=`dirname $0`;
SCRIPT_PATH=`(cd $SCRIPT_PATH && pwd)`
# get the location to install to
INST_PATH=$1
mkdir -p $1
INST_PATH=`(cd $1 && pwd)`
echo $INST_PATH
# get current directory
INIT_DIR=`pwd`
CPU=`grep -c ^processor /proc/cpuinfo`
if [ $? -eq 0 ]; then
if [ "$CPU" -gt "6" ]; then
CPU=6
fi
else
CPU=1
fi
echo "Max compilation CPUs set to $CPU"
SETUP_DIR=$INIT_DIR/install_tmp
mkdir -p $SETUP_DIR/distro # don't delete the actual distro directory until the very end
mkdir -p $INST_PATH/bin
cd $SETUP_DIR
# make sure tools installed can see the install loc of libraries
set +u
export LD_LIBRARY_PATH=`echo $INST_PATH/lib:$LD_LIBRARY_PATH | perl -pe 's/:\$//;'`
export PATH=`echo $INST_PATH/bin:$PATH | perl -pe 's/:\$//;'`
export MANPATH=`echo $INST_PATH/man:$INST_PATH/share/man:$MANPATH | perl -pe 's/:\$//;'`
export PERL5LIB=`echo $INST_PATH/lib/perl5:$PERL5LIB | perl -pe 's/:\$//;'`
set -u
## INSTALL CPANMINUS
curl -sSL https://cpanmin.us/ > $SETUP_DIR/cpanm
perl $SETUP_DIR/cpanm --no-wget --no-interactive --notest --mirror http://cpan.metacpan.org -l $INST_PATH App::cpanminus
rm -f $SETUP_DIR/cpanm
echo "Installing Perl base deps ..."
if [ ! -e $SETUP_DIR/basePerlDeps.success ]; then
perlmods=( "ExtUtils::CBuilder" "Module::Build~0.42" "Const::Fast" "File::Which" "LWP::UserAgent")
for i in "${perlmods[@]}" ; do
cpanm --no-interactive --notest --mirror http://cpan.metacpan.org -l $INST_PATH $i
done
touch $SETUP_DIR/basePerlDeps.success
fi
## libdeflate
if [ ! -e $SETUP_DIR/libdeflate.success ]; then
rm -rf tmp_deflate
mkdir -p tmp_deflate
curl -sSL --retry 10 https://github.com/ebiggers/libdeflate/archive/${VER_LIBDEFLATE}.tar.gz > distro.tar.gz
tar --strip-components 1 -C tmp_deflate -zxf distro.tar.gz
cd tmp_deflate
PREFIX=$INST_PATH make -j$CPU CFLAGS="-fPIC -O3" install
cd ../
rm -rf distro.* tmp_deflate
touch $SETUP_DIR/libdeflate.success
fi
SOURCE_HTSLIB="https://github.com/samtools/htslib/releases/download/${VER_HTSLIB}/htslib-${VER_HTSLIB}.tar.bz2"
cd $SETUP_DIR
echo "Downloading htslib ..."
if [ ! -e $SETUP_DIR/htslibGet.success ]; then
cd $SETUP_DIR
wget $SOURCE_HTSLIB
touch $SETUP_DIR/htslibGet.success
fi
echo "Building htslib ..."
if [ ! -e $SETUP_DIR/htslib.success ]; then
mkdir -p htslib
tar --strip-components 1 -C htslib -jxf htslib-${VER_HTSLIB}.tar.bz2
cd htslib
./configure --enable-plugins --enable-libcurl --with-libdeflate --prefix=$INST_PATH \
CPPFLAGS="-I$INST_PATH/include" \
LDFLAGS="-L${INST_PATH}/lib -Wl,-R${INST_PATH}/lib"
make -j$CPU
make install
cd $SETUP_DIR
touch $SETUP_DIR/htslib.success
fi
alleleCount-4.3.0/c/ 0000775 0000000 0000000 00000000000 14073306636 0014217 5 ustar 00root root 0000000 0000000 alleleCount-4.3.0/c/Makefile 0000664 0000000 0000000 00000006261 14073306636 0015664 0 ustar 00root root 0000000 0000000 VERSION=$(shell perl -I../perl/lib -MSanger::CGP::AlleleCount -e 'print Sanger::CGP::AlleleCount->VERSION;')
#Compiler
CC = gcc -O3 -DALLELECOUNTER_VERSION='"$(VERSION)"'
#compiler flags
# -g adds debug info to the executable file
# -Wall turns on most warnings from compiler
CFLAGS = -g -Wall
#Location of samtools/htslib libraries
HTSLOC?=$(HTSLIB)
HTSTMP?=./htslib_tmp
prefix?=/usr/local/
#Define locations of header files
OPTINC?=-I$(HTSLOC)/
INCLUDES= -Isrc/ $(OPTINC) -rdynamic
JOIN_INCLUDES= -I$(prefix)/include
CAT_LFLAGS= -L$(prefix)/lib
# define library paths in addition to /usr/lib
# if I wanted to include libraries not in /usr/lib I'd specify
# their path using -Lpath, something like:
LFLAGS?= -L$(HTSTMP)
# define any libraries to link into executable:
# if I want to link in libraries (libx.so or libx.a) I use the -llibname
# option, something like (this will link in libmylib.so and libm.so:
LIBS=-lhts -lpthread -lz -lbz2 -llzma -lm -ldl
# define the C source files
SRCS=./src/bam_access.c
#Define test sources
TEST_SRC=$(wildcard ./tests/*_tests.c)
TESTS=$(patsubst %.c,%,$(TEST_SRC))
# define the C object files
#
# This uses Suffix Replacement within a macro:
# $(name:string1=string2)
# For each word in 'name' replace 'string1' with 'string2'
# Below we are replacing the suffix .c of all words in the macro SRCS
# with the .o suffix
#
MD := mkdir -p
OBJS = $(SRCS:.c=.o)
#Build target executable
COUNTER_TARGET=./bin/alleleCounter
#
# The following part of the makefile is generic; it can be used to
# build any executable just by changing the definitions above and by
# deleting dependencies appended to the file from 'make depend'
#
.PHONY: depend clean coverage test make_htslib_tmp remove_htslib_tmp
.NOTPARALLEL: test
all: clean make_bin make_htslib_tmp $(COUNTER_TARGET) test remove_htslib_tmp
@echo Binaries have been compiled.
$(COUNTER_TARGET): $(OBJS)
$(CC) $(JOIN_INCLUDES) $(INCLUDES) $(CFLAGS) -o $(COUNTER_TARGET) $(OBJS) $(LFLAGS) $(CAT_LFLAGS) $(LIBS) ./src/alleleCounter.c
#Unit Tests
test: $(COUNTER_TARGET)
test: CFLAGS += $(JOIN_INCLUDES) $(INCLUDES) $(OBJS) $(LFLAGS) $(LIBS) $(CAT_LFLAGS)
test: $(TESTS)
sh ./tests/runtests.sh
#Unit tests with coverage
coverage: CFLAGS += --coverage
coverage: test
make_bin:
$(MD) ./bin
make_htslib_tmp:
$(MD) $(HTSTMP)
#Do some magic to ensure we compile ALLELECOUNT with the static libhts.a rather than libhts.so
ln -fs $(HTSLOC)/libhts.a $(HTSTMP)/libhts.a
remove_htslib_tmp:
@echo remove tmp hts location
-rm -rf $(HTSTMP)
valgrind:
VALGRIND="valgrind --log-file=/tmp/valgrind-%p.log" $(MAKE)
# this is a suffix replacement rule for building .o's from .c's
# it uses automatic variables $<: the name of the prerequisite of
# the rule(a .c file) and $@: the name of the target of the rule (a .o file)
# (see the gnu make manual section about automatic variables)
.c.o:
$(CC) $(CFLAGS) $(JOIN_INCLUDES) $(INCLUDES) -c $< -o $@
clean:
$(RM) ./src/*.o *~ $(COUNTER_TARGET) ./bin/* ./tests/tests_log $(TESTS) ./src/*.gcda ./src/*.gcov ./src/*.gcno *.gcda *.gcov *.gcno ./tests/*.gcda ./tests/*.gcov ./tests/*.gcno
depend: $(SRCS)
makedepend $(INCLUDES) $^
# DO NOT DELETE THIS LINE -- make depend needs it
alleleCount-4.3.0/c/src/ 0000775 0000000 0000000 00000000000 14073306636 0015006 5 ustar 00root root 0000000 0000000 alleleCount-4.3.0/c/src/alleleCounter.c 0000664 0000000 0000000 00000037030 14073306636 0017753 0 ustar 00root root 0000000 0000000 /** LICENSE
* Copyright (c) 2014-2020 Genome Research Ltd.
*
* Author: CASM/Cancer IT
*
* This file is part of alleleCount.
*
* alleleCount is free software: you can redistribute it and/or modify it under
* the terms of the GNU Affero General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option) any
* later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
* details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
#include
#include
#include
#include
#include
#include
#include "dbg.h"
static int min_base_q = 20;
static int min_map_q = 35;
static char *hts_file;
static char *loci_file;
static char *out_file;
static char *ref_file;
static char *contig = NULL;
static int inc_flag = 3; //Paired, proper pair
static int exc_flag = 3852; // Read unmapped, Mate unmapped, Secondary alignment, Fails QC, Duplicate, Supplementary alignment
static int snp6 = 0;
static int is_10x = 0;
static int is_dense = 0;
int check_exist(char *fname){
FILE *fp;
if((fp = fopen(fname,"r"))){
fclose(fp);
return 1;
}
return 0;
}
void alleleCounter_print_usage (int exit_code){
printf ("Usage: alleleCounter -l loci_file.txt -b sample.bam -o output.txt [-m int] [-r ref.fa.fai]\n\n");
printf (" -l --loci-file [file] Path to loci file.\n");
printf (" -b --hts-file [file] Path to sample HTS file.\n");
printf (" -o --output-file [file] Path write output file.\n\n");
printf ("Optional\n");
printf (" -r --ref-file [file] Path to reference fasta index file.\n");
printf (" NB. If cram format is supplied via -b and the reference listed in the cram header\n");
printf (" can't be found alleleCounter may fail to work correctly.\n");
printf (" -m --min-base-qual [int] Minimum base quality [Default: %d].\n",min_base_q);
printf (" -q --min-map-qual [int] Minimum mapping quality [Default: %d].\n",min_map_q);
printf (" -c --contig [string] Limit calling to named contig.\n");
printf (" -d --dense-snps Improves performance where many positions are close together \n");
printf (" -x --is-10x Enables 10X processing mode.\n");
printf (" In this mode the HTS input file must be a cellranger produced BAM file. Allele\n");
printf (" counts are then given on a per-cellular barcode basis, with each count representing\n");
printf (" the consensus base for that UMI. \n");
printf (" by iterating through bam file rather than using a 'fetch' approach.\n");
printf (" -f --required-flag [int] Flag value of reads to retain in allele counting default: [%i].\n",inc_flag);
printf (" N.B. if the proper-pair flag is are selected, alleleCounter will assume paired-end\n");
printf (" and filter out any proper-pair flagged reads not in F/R orientation.");
printf (" -F --filtered-flag [int] Flag value of reads to exclude in allele counting default: [%i].\n",exc_flag);
printf (" -v --version Display version number.\n");
printf (" -h --help Display this usage information.\n\n");
exit(exit_code);
}
void alleleCounter_print_version (int exit_code){
printf ("%s\n",ALLELECOUNTER_VERSION);
exit(exit_code);
}
void alleleCounter_setup_options(int argc, char *argv[]){
ref_file = NULL;
const struct option long_opts[] =
{
{"loci-file", required_argument, 0, 'l'},
{"hts-file", required_argument, 0, 'b'},
{"ref-file", required_argument, 0, 'r'},
{"output-file",required_argument , 0, 'o'},
{"min-base-qual", required_argument, 0, 'm'},
{"min-map-qual", required_argument, 0, 'q'},
{"is-snp6", required_argument, 0, 's'},
{"is-10x", required_argument, 0, 'x'},
{"contig", required_argument, 0, 'c'},
{"dense-snps", no_argument, 0, 'd'},
{"required-flag", required_argument, 0, 'f'},
{"filtered-flag", required_argument, 0, 'F'},
{"version", no_argument, 0, 'v'},
{"help", no_argument, 0, 'h'},
{ NULL, 0, NULL, 0}
}; //End of declaring opts
int index = 0;
int iarg = 0;
//Iterate through options
while((iarg = getopt_long(argc, argv, "f:F:l:b:m:o:q:r:c:hdsvx", long_opts, &index)) != -1){
switch(iarg){
case 'h':
alleleCounter_print_usage(0);
break;
case 'v':
alleleCounter_print_version(0);
break;
case 'l':
loci_file = optarg;
break;
case 'm':
min_base_q = atoi(optarg);
break;
case 'r':
ref_file = optarg;
break;
case 'q':
min_map_q = atoi(optarg);
break;
case 'b':
hts_file = optarg;
break;
case 'o':
out_file = optarg;
break;
case 's':
snp6 = 1;
break;
case 'x':
is_10x = 1;
break;
case 'c':
contig = optarg;
break;
case 'd':
is_dense = 1;
break;
case 'f':
inc_flag = atoi(optarg);
break;
case 'F':
exc_flag = atoi(optarg);
break;
case '?':
alleleCounter_print_usage (1);
break;
default:
alleleCounter_print_usage (1);
}; // End of args switch statement
}//End of iteration through options
if(check_exist(loci_file) != 1){
printf("Loci file %s does not appear to exist.\n",loci_file);
alleleCounter_print_usage(1);
}
if(check_exist(hts_file) != 1){
printf("HTS file %s does not appear to exist.\n",hts_file);
alleleCounter_print_usage(1);
}
if(ref_file){
if( check_exist(ref_file) != 1){
printf("Reference file provided %s does not appear to exist.\n",ref_file);
alleleCounter_print_usage(1);
}
}
return;
}
int print_loci_head(FILE *output){
int chk = fprintf(output,"#CHR\tPOS\tCount_A\tCount_C\tCount_G\tCount_T\tGood_depth\n");
return chk;
}
int print_snp6_header(FILE *output){
int chk = fprintf(output,"#CHR\tPOS\tCount_Allele_A\tCount_Allele_B\tGood_depth\n");
return chk;
}
int print_10x_header(FILE *output){
int chk = fprintf(output,"#CHR\tPOS\tBarcode\tCount_A\tCount_C\tCount_G\tCount_T\tGood_depth\n");
return chk;
}
int print_header(FILE *output, int snp6){
if(snp6 == 1){
return print_snp6_header(output);
}else if(is_10x == 1){
return print_10x_header(output);
}else{
return print_loci_head(output);
}
}
int calculateAlleleCount(int a_cnt, int c_cnt, int g_cnt, int t_cnt, int allele){
switch (allele){
case 'A':
return a_cnt;
case 'C':
return c_cnt;
case 'G':
return g_cnt;
case 'T':
return t_cnt;
default:
return -1;
};
}
int print_loci_section(FILE *output, char *chr, int pos, int a_cnt, int c_cnt, int g_cnt, int t_cnt, int depth){
assert(output !=NULL);
return (fprintf(output,"%s\t%d\t%d\t%d\t%d\t%d\t%d\n",chr,pos,a_cnt,c_cnt,g_cnt,t_cnt,depth));
}
int print_snp6_section(FILE *output, char *chr, int pos, int allele_a, int allele_b, int depth){
assert(output !=NULL);
return (fprintf(output,"%s\t%d\t%d\t%d\t%d\n",chr,pos,allele_a,allele_b,depth));
}
int print_section(FILE *output, char *chr, int pos, int a_cnt, int c_cnt, int g_cnt,
int t_cnt, int depth, int snp6, char allele_A, char allele_B){
if(snp6 == 1){
int all_a_cnt = calculateAlleleCount(a_cnt, c_cnt, g_cnt, t_cnt, allele_A);
check(all_a_cnt>=0,"Error getting A Allele count '%c'",allele_A);
int all_b_cnt = calculateAlleleCount(a_cnt, c_cnt, g_cnt, t_cnt, allele_B);
check(all_b_cnt>=0,"Error getting B Allele count '%c'",allele_B);
return print_snp6_section(output, chr, pos, all_a_cnt, all_b_cnt, depth);
}else{
return print_loci_section(output, chr, pos, a_cnt, c_cnt, g_cnt,t_cnt, depth);
}
error:
return -1;
}
int get_position_info_from_file(char *line, loci_stats *stats, int snp6, int i){
int chr_d = 0;
if(snp6==1){
int chk = sscanf(line,"%d%*[ \t]%d%*[ \t]%*s%*[ \t]%*s%*[ \t]%c%*[ \t]%c",&chr_d,&(stats->pos),&(stats->allele_A),&(stats->allele_B));
if(chk == 2){
int try = sprintf(stats->chr,"%d",chr_d);
check(try >0,"Error trying to convert chromosome name '%d'to string.",chr_d);
}else{
//Try again but a string match
chk = sscanf(line,"%s%*[ \t]%d%*[ \t]%*s%*[ \t]%*s%*[ \t]%c%*[ \t]%c",stats->chr,&(stats->pos),&(stats->allele_A),&(stats->allele_B));
check(chk==4,"Error attempting string match of allele position info from SNP6 line %s.",line);
}
check(chk==2,"Error parsing SNP6 file line number %d: '%s'.",i,line);
}else{
int chk = sscanf(line,"%d%*[ \t]%d",&chr_d,&(stats->pos));
if(chk == 2){
int try = sprintf(stats->chr,"%d",chr_d);
check(try >0,"Error trying to convert chromosome name '%d'to string.",chr_d);
}else{
//Try again but a string match
chk = sscanf(line,"%s%*[ \t]%d",stats->chr,&(stats->pos));
check(chk==2,"Error parsing loci file line number %d as a string match: '%s'.",i,line);
}
check(chk==2,"Error parsing loci file line number %d: '%s'.",i,line);
}
return 0;
error:
return -1;
}
int line_count (char *file_path){
FILE *f = fopen(file_path,"r");
int line_count = 0;
check(f != NULL, "Error opening file '%s' to count lines.",file_path);
char rd[ 5000 ];
while(fgets(rd, sizeof(rd), f) != NULL){
line_count++;
}
fclose(f);
return line_count;
error:
if(f) fclose(f);
return -1;
}
int sort_loci_stats(const void *a1, const void *b1){
loci_stats *a = *(loci_stats * const *)a1;
loci_stats *b = *(loci_stats * const *)b1;
int res = strcmp(a->chr,b->chr);
if(res==0){
if(a->pos == b->pos){
return 0;
}else{
return a->pos < b->pos ? -1 : 1;
}
}else{
return res;
}
}
int init_base_counts(loci_stats *stats){
stats->base_counts = malloc(sizeof(int) * 4);
check_mem(stats->base_counts);
stats->base_counts[0] = 0;
stats->base_counts[1] = 0;
stats->base_counts[2] = 0;
stats->base_counts[3] = 0;
return 0;
error:
return 1;
}
loci_stats ** read_locis_from_file(char *loci_file, int *line_cnt){
FILE *loci_in = NULL;
loci_stats **stats= NULL;
*line_cnt = line_count(loci_file);
check(*line_cnt>=0,"Error counting lines in loci file: %s",loci_file);
stats = malloc(sizeof(loci_stats*)*(*line_cnt));
check_mem(stats);
//Open loci file
loci_in = fopen(loci_file,"r");
check(loci_in != NULL, "Error opening loci file %s for reading.",loci_file);
int i=0;
char line[2048];
while ( fgets(line,sizeof(line),loci_in) != NULL ){
stats[i] = malloc(sizeof(loci_stats));
check_mem(stats[i]);
stats[i]->chr = malloc(sizeof(char)*2048);
check_mem(stats[i]->chr);
stats[i]->base_counts = NULL;
int check = get_position_info_from_file(line,stats[i],snp6,i);
check(check==0,"Error trying to fetch position from file at line %d.",i);
check = init_base_counts(stats[i]);
check(check==0,"Error initialising base counts %d.",i);
i++;
}
int size = *line_cnt;
qsort(stats,size,sizeof(loci_stats*),&sort_loci_stats);
fclose(loci_in);
return stats;
error:
if(stats) {
int j=0;
for(j=0;j<*line_cnt;j++){
if(stats[j]){
free(stats[j]->chr);
free(stats[j]);
}
}
free(stats);
}
if(loci_in) fclose(loci_in);
return NULL;
}
int main(int argc, char *argv[]){
loci_stats **locis = NULL;
//Get the options commandline
alleleCounter_setup_options(argc,argv);
//Set the min base and mapping quality.
bam_access_min_base_qual(min_base_q);
bam_access_min_map_qual(min_map_q);
bam_access_inc_flag(inc_flag);
bam_access_exc_flag(exc_flag);
//Open output file for writing
FILE *output = fopen(out_file,"w");
check(output != NULL, "Error opening file %s for write.",out_file);
int chk = print_header(output,snp6);
check(chk >= 0,"Error trying to write header '%s'.",out_file);
//Open bam file and iterate through chunks until we reach the cutoff.
chk = -1;
chk = bam_access_openhts(hts_file,ref_file);
check(chk == 0,"Error trying to open sequence/index files '%s'.",hts_file);
int loci_count=0;
fprintf(stderr,"Reading locis\n");
locis = read_locis_from_file(loci_file,&loci_count);
fprintf(stderr,"Done reading locis\n");
check(locis!=NULL,"Error reading loci_stats from file.");
if(is_10x){
fprintf(stderr,"Using 10X processing mode.\n");
if(!is_dense){
int j=0;
for(j=0;jchr,locis[j]->pos,locis[j],is_10x,output);
check(ret==0,"Error retrieving stats from bam file for position %s:%d",locis[j]->chr,locis[j]->pos);
free(locis[j]->chr);
if(locis[j]->base_counts) free(locis[j]->base_counts);
free(locis[j]);
}
}else{
fprintf(stderr,"Multi pos start:\n");
int ret = bam_access_get_multi_position_base_counts(locis, loci_count,is_10x,output);
check(ret==0,"Error scanning through bam file for loci list with dense snps.");
}
}else{
if(is_dense){
fprintf(stderr,"Multi pos start:\n");
int ret = bam_access_get_multi_position_base_counts(locis, loci_count,is_10x,output);
check(ret==0,"Error scanning through bam file for loci list with dense snps.");
int j=0;
for(j=0;jbase_counts[0]+locis[j]->base_counts[1]+locis[j]->base_counts[2]+locis[j]->base_counts[3];
int check_print = print_section(output,locis[j]->chr,locis[j]->pos,locis[j]->base_counts[0],
locis[j]->base_counts[1],locis[j]->base_counts[2],locis[j]->base_counts[3],depth,
snp6,locis[j]->allele_A,locis[j]->allele_B);
check(check_print>0,"Error printing line to output file: %s: %d.",locis[j]->chr,locis[j]->pos);
free(locis[j]->chr);
if(locis[j]->base_counts) free(locis[j]->base_counts);
free(locis[j]);
}
}else{
int j=0;
for(j=0;jchr,locis[j]->pos,locis[j],is_10x,output);
check(ret==0,"Error retrieving stats from bam file for position %s:%d",locis[j]->chr,locis[j]->pos);
int depth = locis[j]->base_counts[0]+locis[j]->base_counts[1]+locis[j]->base_counts[2]+locis[j]->base_counts[3];
int check_print = print_section(output,locis[j]->chr,locis[j]->pos,locis[j]->base_counts[0],
locis[j]->base_counts[1],locis[j]->base_counts[2],locis[j]->base_counts[3],depth,
snp6,locis[j]->allele_A,locis[j]->allele_B);
check(check_print>0,"Error printing line to output file: %s: %d.",locis[j]->chr,locis[j]->pos);
free(locis[j]->chr);
if(locis[j]->base_counts) free(locis[j]->base_counts);
free(locis[j]);
}
free(locis);
}
}
//Close files.
//fclose(loci_in);
bam_access_closehts();
fclose(output);
return 0;
error:
bam_access_closehts();
if(locis){
int j=0;
for(j=0;jchr);
if(locis[j]->base_counts) free(locis[j]->base_counts);
free(locis[j]);
}
}
free(locis);
}
if(output) fclose(output);
if(hts_file) free(hts_file);
if(out_file) free(out_file);
if(loci_file) free(loci_file);
return 1;
}
alleleCount-4.3.0/c/src/bam_access.c 0000664 0000000 0000000 00000035240 14073306636 0017236 0 ustar 00root root 0000000 0000000 /** LICENSE
* Copyright (c) 2014-2020 Genome Research Ltd.
*
* Author: CASM/Cancer IT
*
* This file is part of alleleCount.
*
* alleleCount is free software: you can redistribute it and/or modify it under
* the terms of the GNU Affero General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option) any
* later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
* details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
#include
#include
#include
#include
#include
#include
#include
#include "khash.h"
#define PO10_LIMIT (INT_MAX/10)
KHASH_MAP_INIT_STR(strh,uint8_t)
file_holder *fholder;
int counter = -1;
int include_sw = 0;
int include_dup = 0;
int include_se = 0;
int min_base_qual = 20;
int min_map_qual = 35;
int inc_flag = 3;
int exc_flag = 3852;
int maxitercnt = 1000000000; //Overrride internal maxcnt for iterator!
//Make sure this isn't too close to the integer overflow boundary
//int maxitercnt = 100000;
typedef struct {
} plp_aux_t;
int print_10x_section(FILE *output, char *chr, int pos, int a_cnt, int c_cnt, int g_cnt, int t_cnt, int depth,char *barcode){
assert(output !=NULL);
return (fprintf(output,"%s\t%d\t%s\t%d\t%d\t%d\t%d\t%d\n",chr,pos,barcode,a_cnt,c_cnt,g_cnt,t_cnt,depth));
}
int bam_access_openhts(char *hts_file, char *ref_file){
assert(hts_file != NULL);
//Assign memory for the file name etc holding struct
fholder = malloc(sizeof(file_holder));
check_mem(fholder);
//Beginning and end of tmp struct for bam access
fholder->beg = 0; fholder->end = 0x7fffffff; // The max 32 bit integer.
//Open a file for read from compressed bam.
fholder->in = hts_open(hts_file, "r");
check(fholder->in != 0,"HTS file %s failed to open.",hts_file);
fholder->idx = sam_index_load(fholder->in,hts_file);
check(fholder->idx != 0,"HTS index file for %s failed to open.",hts_file);
if(ref_file){
int chk = hts_set_fai_filename(fholder->in, ref_file);
check(chk==0,"Error setting fai filename %s.",ref_file);
}else{
if(fholder->in->format.format == cram) log_warn("No reference file provided for a cram input file, if the reference described in the cram header can't be located this script may fail.");
}
//Check for generic header read method.
fholder->head = sam_hdr_read(fholder->in);
return 0;
error:
if(fholder->idx) hts_idx_destroy(fholder->idx);
if(fholder->in) hts_close(fholder->in);
if(fholder->head) bam_hdr_destroy(fholder->head);
if(fholder) free(fholder);
return -1;
}
void bam_access_closehts(){
if(fholder && fholder->idx) hts_idx_destroy(fholder->idx);
if(fholder && fholder->in) hts_close(fholder->in);
if(fholder && fholder->head) bam_hdr_destroy(fholder->head);
if(fholder) free(fholder);
return;
}
int no_of_digits(int i){
int n,po10;
if (i < 0) i = -i;
n=1;
po10=10;
while(i>=po10)
{
n++;
if (po10 > PO10_LIMIT) break;
po10*=10;
}
return n;
}
//callback for bam_plp_init
static int pileup_func(void *data, bam1_t *b){
return 0;
}
void pileupCounts(const bam_pileup1_t *pil, int n_plp, loci_stats *stats){
khash_t(strh) *h;
khiter_t k;
h = kh_init(strh);
int i=0;
for(i=0;ib)[p->qpos];
uint8_t c = bam_seqi(bam_get_seq(p->b), p->qpos);
int absent;
k = kh_put(strh, h, bam_get_qname(p->b), &absent);
uint8_t pre_b;
if(!absent){ //Read already processed to get base processed (we only increment if base is different between overlapping read pairs)
k = kh_get(strh, h, bam_get_qname(p->b));
pre_b = kh_val(h,k);
}else{
//Add the value to the hash
kh_value(h, k) = c;
}
if(!(p->is_del) && qual >= min_base_qual && (absent || pre_b != c)){
//&& (c == 1 /*A*/|| c == 2 /*C*/|| c == 4 /*G*/|| c == 8 /*T*/)){
//Now we add a new read pos struct to the list since the read is valid.
//char cbase = toupper(bam_nt16_rev_table[c]);
switch(c){
case 1:
stats->base_counts[0]++;
break;
case 2:
stats->base_counts[1]++;
break;
case 4:
stats->base_counts[2]++;
break;
case 8:
stats->base_counts[3]++;
break;
default:
break;
}; // End of args switch statement */
}
}
kh_destroy(strh, h);
return;
}
//Sort pileup
int readCompare(const void *r1,const void *r2){
char *barcode1,*barcode2;
char *umi1,*umi2;
int bcomp;
bam_pileup1_t *e1 = (bam_pileup1_t *)r1;
bam_pileup1_t *e2 = (bam_pileup1_t *)r2;
barcode1 = bam_aux2Z(bam_aux_get(e1->b,"CB"));
barcode2 = bam_aux2Z(bam_aux_get(e2->b,"CB"));
bcomp = strcmp(barcode1,barcode2);
//Is the barcode equal?
if(bcomp==0){
//If it is, sort on UMI
umi1 = bam_aux2Z(bam_aux_get(e1->b,"UB"));
umi2 = bam_aux2Z(bam_aux_get(e2->b,"UB"));
return strcmp(umi1,umi2);
}
return bcomp;
}
void pileupCounts10x(const bam_pileup1_t *pil, int n_plp, loci_stats *stats,FILE *output){
int i,j,k;
char *barcode = NULL;
char *umi = NULL;
char *curr_barcode = NULL;
char *curr_umi = NULL;
int cnts[4] = {0};
int cellCnts[4] = {0};
bam_pileup1_t *p;
int qual;
int max_obs_reads;
uint8_t c;
//Make a non-constant pointer to reads
p = (bam_pileup1_t *)pil;
//Sort the input array
qsort(p,n_plp,sizeof(bam_pileup1_t),readCompare);
//Loop over sorted reads
printf("Performing pileup of %d reads at %s %d\n",n_plp,stats->chr,stats->pos);
for(i=0;ib)[p->qpos];
c = bam_seqi(bam_get_seq(p->b), p->qpos);
//Get the tags
barcode = bam_aux2Z(bam_aux_get(p->b,"CB"));
umi = bam_aux2Z(bam_aux_get(p->b,"UB"));
//printf("CB=%s, UB=%s\n",barcode,umi);
//Skip this read?
if((p->is_del) || qual < min_base_qual){
//printf("Skipping with %d and qual %d.\n",p->is_del,qual);
p++;
continue;
}
//First time, so we need to initialise current barcode/umi
if(curr_umi==NULL && curr_barcode==NULL){
curr_barcode = barcode;
curr_umi = umi;
}
//Count them now we can assume they're sorted
//Check if the UMI has changed?
if(strcmp(umi,curr_umi)!=0){
//Get the consensus read
max_obs_reads = -1;
j=-1;
for(k=0;k<4;k++){
if(cnts[k] == max_obs_reads){
j=-1;
}
if(cnts[k] > max_obs_reads){
max_obs_reads = cnts[k];
j=k;
}
}
//Add it to the cell level counter
if(j<0){
//printf("No consensus allele: %d,%d,%d,%d\n",cnts[0],cnts[1],cnts[2],cnts[3]);
}else{
cellCnts[j]++;
}
//Re-zero and store new current UMI
cnts[0]=cnts[1]=cnts[2]=cnts[3]=0;
curr_umi = umi;
}
//Has the barcode changed?
if(strcmp(barcode,curr_barcode)!=0){
//The barcode has changed, so print the old one (assuming we found something to use)
if(cellCnts[0]+cellCnts[1]+cellCnts[2]+cellCnts[3]>0)
print_10x_section(output,stats->chr,stats->pos,cellCnts[0],cellCnts[1],cellCnts[2],cellCnts[3],cellCnts[0]+cellCnts[1]+cellCnts[2]+cellCnts[3],curr_barcode);
//Re-zero counters
cellCnts[0]=cellCnts[1]=cellCnts[2]=cellCnts[3]=0;
//Set new barcode and umi
curr_barcode = barcode;
}
//Add the count to the lowest level counter
switch(c){
case 1:
cnts[0]++;
break;
case 2:
cnts[1]++;
break;
case 4:
cnts[2]++;
break;
case 8:
cnts[3]++;
break;
default:
break;
}
p++;
}
//Now finalise the last read
if(n_plp>0){
//Get the consensus read
max_obs_reads = -1;
j = -1;
for(k=0;k<4;k++){
if(cnts[k] == max_obs_reads){
j=-1;
}
if(cnts[k] > max_obs_reads){
max_obs_reads = cnts[k];
j=k;
}
}
//Add it to the cell level counter
if(j<0){
//printf("No consensus allele: %d,%d,%d,%d\n",cnts[0],cnts[1],cnts[2],cnts[3]);
}else{
cellCnts[j]++;
}
//Print the result (if it's worth printing)
if(cellCnts[0]+cellCnts[1]+cellCnts[2]+cellCnts[3]>0)
print_10x_section(output,stats->chr,stats->pos,cellCnts[0],cellCnts[1],cellCnts[2],cellCnts[3],cellCnts[0]+cellCnts[1]+cellCnts[2]+cellCnts[3],curr_barcode);
}
return;
}
int bam_access_get_multi_position_base_counts(loci_stats **stats, int stats_count,int is_10x,FILE* output){
char *region = NULL;
hts_itr_t *iter = NULL;
bam1_t* b = NULL;
bam_plp_t buf;
//Find start and stop for each contig and retrieve a contig at once
int start = 0;
int stop = 0;
char* this_chr;
int stop_idx = 0;
int start_idx = 0;
while(start_idxpos;
stop_idx = i;
this_chr = stats[start_idx]->chr;
start = stats[start_idx]->pos;
if(i+1chr)==0){
stop = stats[i]->pos;
stop_idx = i;
i++;
if(i==stats_count) break;
}
}
region = malloc((sizeof(char *) * (strlen(this_chr)+1))+sizeof(":")+sizeof("-")+(sizeof(char)*((no_of_digits(start)+no_of_digits(stop))+1)));
check_mem(region);
sprintf(region,"%s:%d-%d",this_chr,start,stop);
// initialize pileup
buf = bam_plp_init(pileup_func, (void *)fholder);
bam_plp_set_maxcnt(buf,maxitercnt);
b = bam_init1();
iter = sam_itr_querys(fholder->idx, fholder->head, region);
int j=start_idx;
int result;
const bam_pileup1_t *pl;
int tid, pos, n_plp = -1;
while ((result = sam_itr_next(fholder->in, iter, b)) >= 0) {
uint8_t *aux_val_bcode;
uint8_t *aux_val_umi;
//printf("Got another read \n");
if(b->core.qual < min_map_qual || (b->core.flag & exc_flag) || (b->core.flag & inc_flag) != inc_flag) continue;
//Additional check for properly paired reads - they must be in correct paired end orientation
if(inc_flag & BAM_FPROPER_PAIR){
if ((!(b->core.flag & BAM_FMREVERSE) == !(b->core.flag & BAM_FREVERSE))) continue;
}
//Extract 10x checks
if(is_10x){
aux_val_bcode = bam_aux_get(b,"CB");
aux_val_umi = bam_aux_get(b,"UB");
if(!aux_val_bcode || !aux_val_umi)
continue;
}
//printf("Which passed quality checks.\n");
bam_plp_push(buf, b);
//printf("And we pushed it to the buffer.\n");
while ((pl=bam_plp_next(buf, &tid, &pos, &n_plp)) > 0) {
//printf("Processing pileup at %d stats at %d\n",pos,stats[j]->pos);
if(j==stats_count || pos+1>stats[stop_idx]->pos) break;
while(pos+1>stats[j]->pos){
if(j==stop_idx) break;
j++;//WE've finished this position, move on (no cvg?)
}
if(pos+1==stats[j]->pos){
//printf("Doing inner pileup for %d reads.\n",n_plp);
if(is_10x){
pileupCounts10x(pl, n_plp, stats[j],output);
}else{
pileupCounts(pl, n_plp, stats[j]);
}
}
//printf("Processing EOL pileup at %d stats at %d\n",pos,stats[j]->pos);
if(pos+1>=stats[j]->pos && j==stop_idx) break;
}
//printf("Returning to read loading.\n");
}//End of iteration through sam_iter
check(result>=-1, "Error detected (%d) when trying to iterate through region.",result);
bam_plp_push(buf, 0); // finalize pileup
while ((pl=bam_plp_next(buf, &tid, &pos, &n_plp)) > 0) {
if(j==stats_count || pos+1>stats[stop_idx]->pos) break;
while(pos+1>stats[j]->pos){
if(j==stop_idx) break;
j++;//WE've finished this position, move on (no cvg?)
}
if(pos+1==stats[j]->pos){
//printf("Doing final pileup for %d reads.\n",n_plp);
if(is_10x){
pileupCounts10x(pl, n_plp, stats[j],output);
}else{
pileupCounts(pl, n_plp, stats[j]);
}
}
if(pos+1>=stats[j]->pos && j==stop_idx) break;
}
bam_plp_destroy(buf);
free(region);
bam_destroy1(b);
start_idx = stop_idx+1;
}
return 0;
error:
if(iter) sam_itr_destroy(iter);
if(b) bam_destroy1(b);
if(region) free(region);
return 1;
}
int bam_access_get_position_base_counts(char *chr, int posn, loci_stats *stats,int is_10x,FILE *output){
char *region = NULL;
hts_itr_t *iter = NULL;
bam1_t* b = NULL;
bam_plp_t buf;
fholder->stats = stats;
region = malloc((sizeof(char *) * (strlen(chr)+1))+sizeof(":")+sizeof("-")+(sizeof(char)*((no_of_digits(posn)*2)+1)));
check_mem(region);
sprintf(region,"%s:%d-%d",chr,posn,posn);
fholder->beg = posn;
fholder->end = posn;
// initialize pileup
buf = bam_plp_init(pileup_func, (void *)fholder);
bam_plp_set_maxcnt(buf,maxitercnt);
/*
sam_fetch(fholder->in, fholder->idx, ref, fholder->beg, fholder->end, buf, fetch_algo_func);
*/
//Replace fetch with iterator for htslib compatibility.
b = bam_init1();
iter = sam_itr_querys(fholder->idx, fholder->head, region);
int result;
uint8_t *aux_val_bcode;
uint8_t *aux_val_umi;
//char *barcode;
//char *umi;
while ((result = sam_itr_next(fholder->in, iter, b)) >= 0) {
if(is_10x){
aux_val_bcode = bam_aux_get(b,"CB");
aux_val_umi = bam_aux_get(b,"UB");
if(!aux_val_bcode || !aux_val_umi){
continue;
//printf("Failed to get tags \n");
}
}
if(b->core.qual < min_map_qual || (b->core.flag & exc_flag) || (b->core.flag & inc_flag) != inc_flag) continue;
bam_plp_push(buf, b);
//barcode = bam_aux2Z(aux_val_bcode);
//umi = bam_aux2Z(aux_val_umi);
//printf("Got tag: bc=%s umi=%s\n",barcode,umi);
}
check(result>=-1, "Error detected (%d) when trying to iterate through region.",result);
sam_itr_destroy(iter);
bam_plp_push(buf, 0);
int tid, pos, n_plp = -1;
const bam_pileup1_t *pil;
while ( (pil=bam_plp_next(buf, &tid, &pos, &n_plp)) > 0) {
if((pos+1) != posn) continue;
if(is_10x){
pileupCounts10x(pil, n_plp, fholder->stats,output);
}else{
pileupCounts(pil, n_plp, fholder->stats);
}
} //End of iteration through pileup
//bam_plp_push(buf, 0); // finalize pileup
bam_plp_destroy(buf);
free(region);
bam_destroy1(b);
return 0;
error:
//if(region) free(region);
if(fholder->stats){
if(fholder->stats->base_counts) free(fholder->stats->base_counts);
free(fholder->stats);
}
if(iter) sam_itr_destroy(iter);
if(b) bam_destroy1(b);
if(region) free(region);
return 1;
}
void bam_access_min_base_qual(int qual){
min_base_qual = qual;
return;
}
void bam_access_min_map_qual(int qual){
min_map_qual = qual;
return;
}
void bam_access_inc_flag(int inc){
inc_flag = inc;
return;
}
void bam_access_exc_flag(int exc){
exc_flag = exc;
return;
}
alleleCount-4.3.0/c/src/bam_access.h 0000664 0000000 0000000 00000003231 14073306636 0017236 0 ustar 00root root 0000000 0000000 /** LICENSE
* Copyright (c) 2014-2020 Genome Research Ltd.
*
* Author: CASM/Cancer IT
*
* This file is part of alleleCount.
*
* alleleCount is free software: you can redistribute it and/or modify it under
* the terms of the GNU Affero General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option) any
* later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
* details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
#ifndef _bam_access_h
#define _bam_access_h
#include
#include
#include
#include
typedef struct loci_stats{
int *base_counts;
char *chr;
int pos;
char allele_A;
char allele_B;
} loci_stats;
typedef struct file_holder{
int beg, end;
htsFile *in;
hts_idx_t *idx;
loci_stats *stats;
bam_hdr_t *head;
} file_holder;
void bam_access_min_base_qual(int qual);
void bam_access_min_map_qual(int qual);
void bam_access_inc_flag(int inc);
void bam_access_exc_flag(int exc);
int bam_access_openhts(char *hts_file, char *ref_file);
int bam_access_get_position_base_counts(char *chr, int pos, loci_stats *stats,int is_10x,FILE *output);
int bam_access_get_multi_position_base_counts(loci_stats **stats, int stats_count,int is_10x,FILE *output);
void bam_access_closehts();
int readCompare(const void *r1,const void *r2);
#endif
alleleCount-4.3.0/c/src/dbg.h 0000664 0000000 0000000 00000005111 14073306636 0015711 0 ustar 00root root 0000000 0000000 /**
* Copyright (c) 2010, Zed A. Shaw. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name of the Learn C The Hard Way, Zed A. Shaw, nor the names
* of its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __dbg_h__
#define __dbg_h__
#include
#include
#include
#ifdef NDEBUG
#define debug(M, ...)
#else
#define debug(M, ...) fprintf(stderr, "DEBUG %s: %s:%d " M "\n", __FILE__, __func__, __LINE__, ##__VA_ARGS__)
#endif
#define clean_errno() (errno == 0 ? "None" : strerror(errno))
#define log_err(M, ...) fprintf(stderr, "[ERROR] (%s: %s:%d errno: %s) " M "\n", __FILE__, __func__, __LINE__, clean_errno(), ##__VA_ARGS__)
#define log_warn(M, ...) fprintf(stderr, "[WARN] (%s: %s:%d errno: %s) " M "\n", __FILE__, __func__, __LINE__, clean_errno(), ##__VA_ARGS__)
#define log_info(M, ...) fprintf(stderr, "[INFO] (%s: %s:%d) " M "\n", __FILE__, __func__, __LINE__, ##__VA_ARGS__)
#define check(A, M, ...) if(!(A)) { log_err(M, ##__VA_ARGS__); errno=0; goto error; }
#define sentinel(M, ...) { log_err(M, ##__VA_ARGS__); errno=0; goto error; }
#define check_mem(A) check((A), "Out of memory.")
#define check_debug(A, M, ...) if(!(A)) { debug(M, ##__VA_ARGS__); errno=0; goto error; }
#endif
alleleCount-4.3.0/c/src/khash.h 0000664 0000000 0000000 00000051441 14073306636 0016262 0 ustar 00root root 0000000 0000000 /* The MIT License
Copyright (c) 2008, 2009, 2011 by Attractive Chaos
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
/*
An example:
#include "khash.h"
KHASH_MAP_INIT_INT(32, char)
int main() {
int ret, is_missing;
khiter_t k;
khash_t(32) *h = kh_init(32);
k = kh_put(32, h, 5, &ret);
kh_value(h, k) = 10;
k = kh_get(32, h, 10);
is_missing = (k == kh_end(h));
k = kh_get(32, h, 5);
kh_del(32, h, k);
for (k = kh_begin(h); k != kh_end(h); ++k)
if (kh_exist(h, k)) kh_value(h, k) = 1;
kh_destroy(32, h);
return 0;
}
*/
/*
2013-05-02 (0.2.8):
* Use quadratic probing. When the capacity is power of 2, stepping function
i*(i+1)/2 guarantees to traverse each bucket. It is better than double
hashing on cache performance and is more robust than linear probing.
In theory, double hashing should be more robust than quadratic probing.
However, my implementation is probably not for large hash tables, because
the second hash function is closely tied to the first hash function,
which reduce the effectiveness of double hashing.
Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php
2011-12-29 (0.2.7):
* Minor code clean up; no actual effect.
2011-09-16 (0.2.6):
* The capacity is a power of 2. This seems to dramatically improve the
speed for simple keys. Thank Zilong Tan for the suggestion. Reference:
- http://code.google.com/p/ulib/
- http://nothings.org/computer/judy/
* Allow to optionally use linear probing which usually has better
performance for random input. Double hashing is still the default as it
is more robust to certain non-random input.
* Added Wang's integer hash function (not used by default). This hash
function is more robust to certain non-random input.
2011-02-14 (0.2.5):
* Allow to declare global functions.
2009-09-26 (0.2.4):
* Improve portability
2008-09-19 (0.2.3):
* Corrected the example
* Improved interfaces
2008-09-11 (0.2.2):
* Improved speed a little in kh_put()
2008-09-10 (0.2.1):
* Added kh_clear()
* Fixed a compiling error
2008-09-02 (0.2.0):
* Changed to token concatenation which increases flexibility.
2008-08-31 (0.1.2):
* Fixed a bug in kh_get(), which has not been tested previously.
2008-08-31 (0.1.1):
* Added destructor
*/
#ifndef __AC_KHASH_H
#define __AC_KHASH_H
/*!
@header
Generic hash table library.
*/
#define AC_VERSION_KHASH_H "0.2.8"
#include
#include
#include
/* compiler specific configuration */
#if UINT_MAX == 0xffffffffu
typedef unsigned int khint32_t;
#elif ULONG_MAX == 0xffffffffu
typedef unsigned long khint32_t;
#endif
#if ULONG_MAX == ULLONG_MAX
typedef unsigned long khint64_t;
#else
typedef unsigned long long khint64_t;
#endif
#ifndef kh_inline
#ifdef _MSC_VER
#define kh_inline __inline
#else
#define kh_inline inline
#endif
#endif /* kh_inline */
typedef khint32_t khint_t;
typedef khint_t khiter_t;
#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4)
#ifndef kroundup32
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
#endif
#ifndef kcalloc
#define kcalloc(N,Z) calloc(N,Z)
#endif
#ifndef kmalloc
#define kmalloc(Z) malloc(Z)
#endif
#ifndef krealloc
#define krealloc(P,Z) realloc(P,Z)
#endif
#ifndef kfree
#define kfree(P) free(P)
#endif
static const double __ac_HASH_UPPER = 0.77;
#define __KHASH_TYPE(name, khkey_t, khval_t) \
typedef struct kh_##name##_s { \
khint_t n_buckets, size, n_occupied, upper_bound; \
khint32_t *flags; \
khkey_t *keys; \
khval_t *vals; \
} kh_##name##_t;
#define __KHASH_PROTOTYPES(name, khkey_t, khval_t) \
extern kh_##name##_t *kh_init_##name(void); \
extern void kh_destroy_##name(kh_##name##_t *h); \
extern void kh_clear_##name(kh_##name##_t *h); \
extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \
extern int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \
extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \
extern void kh_del_##name(kh_##name##_t *h, khint_t x);
#define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
SCOPE kh_##name##_t *kh_init_##name(void) { \
return (kh_##name##_t*)kcalloc(1, sizeof(kh_##name##_t)); \
} \
SCOPE void kh_destroy_##name(kh_##name##_t *h) \
{ \
if (h) { \
kfree((void *)h->keys); kfree(h->flags); \
kfree((void *)h->vals); \
kfree(h); \
} \
} \
SCOPE void kh_clear_##name(kh_##name##_t *h) \
{ \
if (h && h->flags) { \
memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \
h->size = h->n_occupied = 0; \
} \
} \
SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
{ \
if (h->n_buckets) { \
khint_t k, i, last, mask, step = 0; \
mask = h->n_buckets - 1; \
k = __hash_func(key); i = k & mask; \
last = i; \
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
i = (i + (++step)) & mask; \
if (i == last) return h->n_buckets; \
} \
return __ac_iseither(h->flags, i)? h->n_buckets : i; \
} else return 0; \
} \
SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
{ /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \
khint32_t *new_flags = 0; \
khint_t j = 1; \
{ \
kroundup32(new_n_buckets); \
if (new_n_buckets < 4) new_n_buckets = 4; \
if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \
else { /* hash table size to be changed (shrink or expand); rehash */ \
new_flags = (khint32_t*)kmalloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
if (!new_flags) return -1; \
memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
if (h->n_buckets < new_n_buckets) { /* expand */ \
khkey_t *new_keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
if (!new_keys) { kfree(new_flags); return -1; } \
h->keys = new_keys; \
if (kh_is_map) { \
khval_t *new_vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
if (!new_vals) { kfree(new_flags); return -1; } \
h->vals = new_vals; \
} \
} /* otherwise shrink */ \
} \
} \
if (j) { /* rehashing is needed */ \
for (j = 0; j != h->n_buckets; ++j) { \
if (__ac_iseither(h->flags, j) == 0) { \
khkey_t key = h->keys[j]; \
khval_t val; \
khint_t new_mask; \
new_mask = new_n_buckets - 1; \
if (kh_is_map) val = h->vals[j]; \
__ac_set_isdel_true(h->flags, j); \
while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
khint_t k, i, step = 0; \
k = __hash_func(key); \
i = k & new_mask; \
while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \
__ac_set_isempty_false(new_flags, i); \
if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \
{ khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \
__ac_set_isdel_true(h->flags, i); /* mark it as deleted in the old hash table */ \
} else { /* write the element and jump out of the loop */ \
h->keys[i] = key; \
if (kh_is_map) h->vals[i] = val; \
break; \
} \
} \
} \
} \
if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \
h->keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
if (kh_is_map) h->vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
} \
kfree(h->flags); /* free the working space */ \
h->flags = new_flags; \
h->n_buckets = new_n_buckets; \
h->n_occupied = h->size; \
h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
} \
return 0; \
} \
SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
{ \
khint_t x; \
if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \
if (h->n_buckets > (h->size<<1)) { \
if (kh_resize_##name(h, h->n_buckets - 1) < 0) { /* clear "deleted" elements */ \
*ret = -1; return h->n_buckets; \
} \
} else if (kh_resize_##name(h, h->n_buckets + 1) < 0) { /* expand the hash table */ \
*ret = -1; return h->n_buckets; \
} \
} /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
{ \
khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \
x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \
if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \
else { \
last = i; \
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
if (__ac_isdel(h->flags, i)) site = i; \
i = (i + (++step)) & mask; \
if (i == last) { x = site; break; } \
} \
if (x == h->n_buckets) { \
if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \
else x = i; \
} \
} \
} \
if (__ac_isempty(h->flags, x)) { /* not present at all */ \
h->keys[x] = key; \
__ac_set_isboth_false(h->flags, x); \
++h->size; ++h->n_occupied; \
*ret = 1; \
} else if (__ac_isdel(h->flags, x)) { /* deleted */ \
h->keys[x] = key; \
__ac_set_isboth_false(h->flags, x); \
++h->size; \
*ret = 2; \
} else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \
return x; \
} \
SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \
{ \
if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \
__ac_set_isdel_true(h->flags, x); \
--h->size; \
} \
}
#define KHASH_DECLARE(name, khkey_t, khval_t) \
__KHASH_TYPE(name, khkey_t, khval_t) \
__KHASH_PROTOTYPES(name, khkey_t, khval_t)
#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
__KHASH_TYPE(name, khkey_t, khval_t) \
__KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
KHASH_INIT2(name, static kh_inline, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
/* --- BEGIN OF HASH FUNCTIONS --- */
/*! @function
@abstract Integer hash function
@param key The integer [khint32_t]
@return The hash value [khint_t]
*/
#define kh_int_hash_func(key) (khint32_t)(key)
/*! @function
@abstract Integer comparison function
*/
#define kh_int_hash_equal(a, b) ((a) == (b))
/*! @function
@abstract 64-bit integer hash function
@param key The integer [khint64_t]
@return The hash value [khint_t]
*/
#define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11)
/*! @function
@abstract 64-bit integer comparison function
*/
#define kh_int64_hash_equal(a, b) ((a) == (b))
/*! @function
@abstract const char* hash function
@param s Pointer to a null terminated string
@return The hash value
*/
static kh_inline khint_t __ac_X31_hash_string(const char *s)
{
khint_t h = (khint_t)*s;
if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s;
return h;
}
/*! @function
@abstract Another interface to const char* hash function
@param key Pointer to a null terminated string [const char*]
@return The hash value [khint_t]
*/
#define kh_str_hash_func(key) __ac_X31_hash_string(key)
/*! @function
@abstract Const char* comparison function
*/
#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
static kh_inline khint_t __ac_Wang_hash(khint_t key)
{
key += ~(key << 15);
key ^= (key >> 10);
key += (key << 3);
key ^= (key >> 6);
key += ~(key << 11);
key ^= (key >> 16);
return key;
}
#define kh_int_hash_func2(k) __ac_Wang_hash((khint_t)key)
/* --- END OF HASH FUNCTIONS --- */
/* Other convenient macros... */
/*!
@abstract Type of the hash table.
@param name Name of the hash table [symbol]
*/
#define khash_t(name) kh_##name##_t
/*! @function
@abstract Initiate a hash table.
@param name Name of the hash table [symbol]
@return Pointer to the hash table [khash_t(name)*]
*/
#define kh_init(name) kh_init_##name()
/*! @function
@abstract Destroy a hash table.
@param name Name of the hash table [symbol]
@param h Pointer to the hash table [khash_t(name)*]
*/
#define kh_destroy(name, h) kh_destroy_##name(h)
/*! @function
@abstract Reset a hash table without deallocating memory.
@param name Name of the hash table [symbol]
@param h Pointer to the hash table [khash_t(name)*]
*/
#define kh_clear(name, h) kh_clear_##name(h)
/*! @function
@abstract Resize a hash table.
@param name Name of the hash table [symbol]
@param h Pointer to the hash table [khash_t(name)*]
@param s New size [khint_t]
*/
#define kh_resize(name, h, s) kh_resize_##name(h, s)
/*! @function
@abstract Insert a key to the hash table.
@param name Name of the hash table [symbol]
@param h Pointer to the hash table [khash_t(name)*]
@param k Key [type of keys]
@param r Extra return code: -1 if the operation failed;
0 if the key is present in the hash table;
1 if the bucket is empty (never used); 2 if the element in
the bucket has been deleted [int*]
@return Iterator to the inserted element [khint_t]
*/
#define kh_put(name, h, k, r) kh_put_##name(h, k, r)
/*! @function
@abstract Retrieve a key from the hash table.
@param name Name of the hash table [symbol]
@param h Pointer to the hash table [khash_t(name)*]
@param k Key [type of keys]
@return Iterator to the found element, or kh_end(h) if the element is absent [khint_t]
*/
#define kh_get(name, h, k) kh_get_##name(h, k)
/*! @function
@abstract Remove a key from the hash table.
@param name Name of the hash table [symbol]
@param h Pointer to the hash table [khash_t(name)*]
@param k Iterator to the element to be deleted [khint_t]
*/
#define kh_del(name, h, k) kh_del_##name(h, k)
/*! @function
@abstract Test whether a bucket contains data.
@param h Pointer to the hash table [khash_t(name)*]
@param x Iterator to the bucket [khint_t]
@return 1 if containing data; 0 otherwise [int]
*/
#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))
/*! @function
@abstract Get key given an iterator
@param h Pointer to the hash table [khash_t(name)*]
@param x Iterator to the bucket [khint_t]
@return Key [type of keys]
*/
#define kh_key(h, x) ((h)->keys[x])
/*! @function
@abstract Get value given an iterator
@param h Pointer to the hash table [khash_t(name)*]
@param x Iterator to the bucket [khint_t]
@return Value [type of values]
@discussion For hash sets, calling this results in segfault.
*/
#define kh_val(h, x) ((h)->vals[x])
/*! @function
@abstract Alias of kh_val()
*/
#define kh_value(h, x) ((h)->vals[x])
/*! @function
@abstract Get the start iterator
@param h Pointer to the hash table [khash_t(name)*]
@return The start iterator [khint_t]
*/
#define kh_begin(h) (khint_t)(0)
/*! @function
@abstract Get the end iterator
@param h Pointer to the hash table [khash_t(name)*]
@return The end iterator [khint_t]
*/
#define kh_end(h) ((h)->n_buckets)
/*! @function
@abstract Get the number of elements in the hash table
@param h Pointer to the hash table [khash_t(name)*]
@return Number of elements in the hash table [khint_t]
*/
#define kh_size(h) ((h)->size)
/*! @function
@abstract Get the number of buckets in the hash table
@param h Pointer to the hash table [khash_t(name)*]
@return Number of buckets in the hash table [khint_t]
*/
#define kh_n_buckets(h) ((h)->n_buckets)
/*! @function
@abstract Iterate over the entries in the hash table
@param h Pointer to the hash table [khash_t(name)*]
@param kvar Variable to which key will be assigned
@param vvar Variable to which value will be assigned
@param code Block of code to execute
*/
#define kh_foreach(h, kvar, vvar, code) { khint_t __i; \
for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \
if (!kh_exist(h,__i)) continue; \
(kvar) = kh_key(h,__i); \
(vvar) = kh_val(h,__i); \
code; \
} }
/*! @function
@abstract Iterate over the values in the hash table
@param h Pointer to the hash table [khash_t(name)*]
@param vvar Variable to which value will be assigned
@param code Block of code to execute
*/
#define kh_foreach_value(h, vvar, code) { khint_t __i; \
for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \
if (!kh_exist(h,__i)) continue; \
(vvar) = kh_val(h,__i); \
code; \
} }
/* More conenient interfaces */
/*! @function
@abstract Instantiate a hash set containing integer keys
@param name Name of the hash table [symbol]
*/
#define KHASH_SET_INIT_INT(name) \
KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
/*! @function
@abstract Instantiate a hash map containing integer keys
@param name Name of the hash table [symbol]
@param khval_t Type of values [type]
*/
#define KHASH_MAP_INIT_INT(name, khval_t) \
KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
/*! @function
@abstract Instantiate a hash map containing 64-bit integer keys
@param name Name of the hash table [symbol]
*/
#define KHASH_SET_INIT_INT64(name) \
KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
/*! @function
@abstract Instantiate a hash map containing 64-bit integer keys
@param name Name of the hash table [symbol]
@param khval_t Type of values [type]
*/
#define KHASH_MAP_INIT_INT64(name, khval_t) \
KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
typedef const char *kh_cstr_t;
/*! @function
@abstract Instantiate a hash map containing const char* keys
@param name Name of the hash table [symbol]
*/
#define KHASH_SET_INIT_STR(name) \
KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
/*! @function
@abstract Instantiate a hash map containing const char* keys
@param name Name of the hash table [symbol]
@param khval_t Type of values [type]
*/
#define KHASH_MAP_INIT_STR(name, khval_t) \
KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
#endif /* __AC_KHASH_H */
alleleCount-4.3.0/c/tests/ 0000775 0000000 0000000 00000000000 14073306636 0015361 5 ustar 00root root 0000000 0000000 alleleCount-4.3.0/c/tests/bam_access_tests.c 0000664 0000000 0000000 00000010425 14073306636 0021031 0 ustar 00root root 0000000 0000000 /** LICENSE
* Copyright (c) 2014-2020 Genome Research Ltd.
*
* Author: CASM/Cancer IT
*
* This file is part of alleleCount.
*
* alleleCount is free software: you can redistribute it and/or modify it under
* the terms of the GNU Affero General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option) any
* later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
* details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
#include "minunit.h"
#include
char *test_bam = "../testData/test.bam";
char *test_ref = "../testData/ref.fa.fai";
char *test_bam10X = "../testData/test10X.bam";
char *test_10X_cnts = "../testData/test10X_true.cnts";
char *test_bam_access_get_position_base_counts(){
//Check with default settings
char *chr = "22";
int pos = 16165776;
loci_stats *stats = malloc(sizeof(loci_stats));
stats->pos = pos;
stats->chr = chr;
stats->base_counts = malloc(sizeof(int) * 4);
stats->base_counts[0] = 0;
stats->base_counts[1] = 0;
stats->base_counts[2] = 0;
stats->base_counts[3] = 0;
int chk = -1;
chk = bam_access_openhts(test_bam,test_ref);
check(chk == 0,"Error trying to open bam file '%s'.",test_bam);
int res = bam_access_get_position_base_counts(chr,pos,stats,0,NULL);
mu_assert(stats->base_counts[0]==2,"Check A count 1");
mu_assert(stats->base_counts[1]==17,"Check C count 1");
mu_assert(stats->base_counts[2]==0,"Check G count 1");
mu_assert(stats->base_counts[3]==0,"Check T count 1");
free(stats->base_counts);
stats->base_counts = malloc(sizeof(int) * 4);
stats->base_counts[0] = 0;
stats->base_counts[1] = 0;
stats->base_counts[2] = 0;
stats->base_counts[3] = 0;
int min_bq = 15;
bam_access_min_base_qual(min_bq);
res = bam_access_get_position_base_counts(chr,pos,stats,0,NULL);
mu_assert(stats->base_counts[0]==2,"Check A count 2");
mu_assert(stats->base_counts[1]==18,"Check C count 2");
mu_assert(stats->base_counts[2]==0,"Check G count 2");
mu_assert(stats->base_counts[3]==0,"Check T count 2");
free(stats->base_counts);
stats->base_counts = malloc(sizeof(int) * 4);
stats->base_counts[0] = 0;
stats->base_counts[1] = 0;
stats->base_counts[2] = 0;
stats->base_counts[3] = 0;
int min_mq = 15;
bam_access_min_map_qual(min_mq);
res = bam_access_get_position_base_counts(chr,pos,stats,0,NULL);
mu_assert(stats->base_counts[0]==2,"Check A count 3");
mu_assert(stats->base_counts[1]==24,"Check C count 3");
mu_assert(stats->base_counts[2]==0,"Check G count 3");
mu_assert(stats->base_counts[3]==0,"Check T count 3");
free(stats->base_counts);
bam_access_closehts();
//Check 10X mode
//Open the 10X BAM
bam_access_min_base_qual(20);
bam_access_min_map_qual(200);
bam_access_inc_flag(0);
bam_access_exc_flag(0);
chk = bam_access_openhts(test_bam10X,test_ref);
check(chk == 0,"Error trying to open bam file '%s'.",test_bam10X);
FILE *output = fopen("../testData/test10X.cnts","w");
int loci_count=1;
chr = "1";
pos = 198661939;
stats->pos = pos;
stats->chr = chr;
stats->base_counts = malloc(sizeof(int) * 4);
stats->base_counts[0] = 0;
stats->base_counts[1] = 0;
stats->base_counts[2] = 0;
stats->base_counts[3] = 0;
res = bam_access_get_multi_position_base_counts(&stats,loci_count,1,output);
fclose(output);
free(stats->base_counts);
//Now load the output and check it's correct
FILE *gold = fopen(test_10X_cnts,"r");
FILE *cnts10X = fopen("../testData/test10X.cnts","r");
//Check we can get the files
check(cnts10X != NULL && gold !=NULL,"Error trying to open 10X output file.");
//Now compare them for being identical
int ch1,ch2;
do{
ch1 = getc(gold);
ch2 = getc(cnts10X);
}while((ch1 != EOF) && (ch2 != EOF) && (ch1 == ch2));
mu_assert(ch1==ch2,"Check 10X output");
fclose(cnts10X);
fclose(gold);
bam_access_closehts();
free(stats);
return NULL;
error:
return "1";
}
char *all_tests() {
mu_suite_start();
mu_run_test(test_bam_access_get_position_base_counts);
return NULL;
}
RUN_TESTS(all_tests);
alleleCount-4.3.0/c/tests/minunit.h 0000664 0000000 0000000 00000003127 14073306636 0017220 0 ustar 00root root 0000000 0000000 /** LICENSE
* Copyright (c) 2014-2020 Genome Research Ltd.
*
* Author: CASM/Cancer IT
*
* This file is part of alleleCount.
*
* alleleCount is free software: you can redistribute it and/or modify it under
* the terms of the GNU Affero General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option) any
* later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
* details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
#undef NDEBUG
#ifndef _minunit_h
#define _minunit_h
#include
#include
#include
#define mu_suite_start() char *message = NULL
#define mu_assert(test, message) if (!(test)) { log_err(message); return message; }
#define mu_run_test(test) debug("\n-----%s", " " #test); \
message = test(); tests_run++; if (message) return message;
#define RUN_TESTS(name) int main(int argc, char *argv[]) {\
argc = 1; \
debug("----- RUNNING: %s", argv[0]);\
printf("----\nRUNNING: %s\n", argv[0]);\
char *result = name();\
if (result != 0) {\
printf("FAILED: %s\n", result);\
}\
else {\
printf("ALL TESTS PASSED\n");\
}\
printf("Tests run: %d\n", tests_run);\
exit(result != 0);\
}
int tests_run;
#endif
alleleCount-4.3.0/c/tests/runtests.sh 0000664 0000000 0000000 00000002136 14073306636 0017606 0 ustar 00root root 0000000 0000000 ########## LICENSE ##########
# Copyright (c) 2014-2020 Genome Research Ltd.
#
# Author: CASM/Cancer IT
#
# This file is part of alleleCount.
#
# alleleCount is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation; either version 3 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see .
###########################
echo "Running unit tests:"
for i in tests/*_tests
do
if test -f $i
then
if $VALGRIND ./$i 2>> tests/tests_log
then
echo $i PASS
else
echo "ERROR in test $i: here's tests/tests_log"
echo "------"
tail tests/tests_log
exit 1
fi
fi
done
echo ""
alleleCount-4.3.0/example/ 0000775 0000000 0000000 00000000000 14073306636 0015430 5 ustar 00root root 0000000 0000000 alleleCount-4.3.0/example/gender.loci 0000664 0000000 0000000 00000000050 14073306636 0017537 0 ustar 00root root 0000000 0000000 Y 4546684
Y 2934912
Y 4550107
Y 4549638
alleleCount-4.3.0/example/gender_chr.loci 0000664 0000000 0000000 00000000064 14073306636 0020400 0 ustar 00root root 0000000 0000000 chrY 4546684
chrY 2934912
chrY 4550107
chrY 4549638
alleleCount-4.3.0/perl/ 0000775 0000000 0000000 00000000000 14073306636 0014737 5 ustar 00root root 0000000 0000000 alleleCount-4.3.0/perl/MANIFEST 0000664 0000000 0000000 00000000315 14073306636 0016067 0 ustar 00root root 0000000 0000000 bin/alleleCounter.pl
docs.tar.gz
lib/Sanger/CGP/AlleleCount.pm
lib/Sanger/CGP/AlleleCount/Genotype.pm
Makefile.PL
MANIFEST This list of files
MANIFEST.SKIP
t/1_pm_compile.t
t/2_pl_compile.t
t/genotype.t
alleleCount-4.3.0/perl/MANIFEST.SKIP 0000664 0000000 0000000 00000002003 14073306636 0016630 0 ustar 00root root 0000000 0000000
#!start included /software/perl-5.16.3/lib/5.16.3/ExtUtils/MANIFEST.SKIP
# Avoid version control files.
\bRCS\b
\bCVS\b
\bSCCS\b
,v$
\B\.svn\b
\B\.git\b
\B\.gitignore\b
\b_darcs\b
\B\.cvsignore$
# Avoid VMS specific MakeMaker generated files
\bDescrip.MMS$
\bDESCRIP.MMS$
\bdescrip.mms$
# Avoid Makemaker generated and utility files.
\bMANIFEST\.bak
\bMakefile$
\bblib/
\bMakeMaker-\d
\bpm_to_blib\.ts$
\bpm_to_blib$
\bblibdirs\.ts$ # 6.18 through 6.25 generated this
# Avoid Module::Build generated and utility files.
\bBuild$
\b_build/
\bBuild.bat$
\bBuild.COM$
\bBUILD.COM$
\bbuild.com$
# Avoid temp and backup files.
~$
\.old$
\#$
\b\.#
\.bak$
\.tmp$
\.#
\.rej$
# Avoid OS-specific files/dirs
# Mac OSX metadata
\B\.DS_Store
# Mac OSX SMB mount metadata files
\B\._
# Avoid Devel::Cover and Devel::CoverX::Covered files.
\bcover_db\b
\bcovered\b
# Avoid MYMETA files
^MYMETA\.
#!end included /software/perl-5.16.3/lib/5.16.3/ExtUtils/MANIFEST.SKIP
# specific things from this project
^docs/
^perltidy.LOG
alleleCount-4.3.0/perl/Makefile.PL 0000664 0000000 0000000 00000003135 14073306636 0016713 0 ustar 00root root 0000000 0000000 #!/usr/bin/perl
##########LICENCE##########
# Copyright (c) 2014-2020 Genome Research Ltd.
#
# Author: CASM/Cancer IT
#
# This file is part of alleleCount.
#
# alleleCount is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation; either version 3 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see .
##########LICENCE##########
use ExtUtils::MakeMaker;
WriteMakefile(
NAME => 'alleleCount',
LICENSE => 'agpl_3', # http://search.cpan.org/~dagolden/CPAN-Meta-2.142690/lib/CPAN/Meta/Spec.pm#license
VERSION_FROM => 'lib/Sanger/CGP/AlleleCount.pm',
EXE_FILES => [qw( bin/alleleCounter.pl bin/alleleCounterToJson.pl )],
PREREQ_PM => {
'Const::Fast' => 0.014,
'Try::Tiny' => 0.19,
'File::Slurp' => 9999.19,
'File::Which' => 0.05,
'Test::Fatal' => 0.013,
'Devel::Cover' => 1.09,
'Pod::Coverage' => 0.23,
'IPC::System::Simple' => 1.25,
'JSON' => 2.90,
}
);
alleleCount-4.3.0/perl/bin/ 0000775 0000000 0000000 00000000000 14073306636 0015507 5 ustar 00root root 0000000 0000000 alleleCount-4.3.0/perl/bin/alleleCounter.pl 0000775 0000000 0000000 00000007235 14073306636 0020654 0 ustar 00root root 0000000 0000000 #!/usr/bin/perl
##########LICENCE##########
# Copyright (c) 2014-2020 Genome Research Ltd.
#
# Author: CASM/Cancer IT
#
# This file is part of alleleCount.
#
# alleleCount is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation; either version 3 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see .
##########LICENCE##########
use FindBin qw($Bin);
use lib "$Bin/../lib";
use strict;
use Carp;
use English qw( -no_match_vars );
use warnings FATAL => 'all';
use Getopt::Long 'GetOptions';
use Pod::Usage;
use Const::Fast qw(const);
use Sanger::CGP::AlleleCount::Genotype;
const my $MIN_MAPQ => 35;
const my $MIN_PBQ => 30; # needs to correlate with Illumina bins
{
my $options = option_builder();
$options->{'o'} = '/dev/stdout' unless(defined $options->{'o'});
run($options);
}
sub run {
my ($options) = @_;
my $geno_ob = Sanger::CGP::AlleleCount::Genotype->new();
if($options->{'g'}) {
$geno_ob->gender_chk($options->{'b'}, $options->{'o'}, $options->{'l'}, $options->{'m'}, $options->{'q'}, $options->{'r'});
}
elsif($options->{'s'}) {
$geno_ob->get_full_snp6_profile($options->{'b'}, $options->{'o'}, $options->{'l'}, $options->{'m'}, $options->{'q'}, $options->{'r'});
}
else {
$geno_ob->get_full_loci_profile($options->{'b'}, $options->{'o'}, $options->{'l'}, $options->{'m'}, $options->{'q'}, $options->{'r'});
}
}
sub option_builder {
my ($factory) = @_;
my %opts;
&GetOptions (
'h|help' => \$opts{'h'},
'b|bam=s' => \$opts{'b'},
'o|output=s' => \$opts{'o'},
'l|locus=s' => \$opts{'l'},
'g|gender' => \$opts{'g'},
's|snp6' => \$opts{'s'},
'r|ref=s' => \$opts{'r'},
'm|minqual=n' => \$opts{'m'},
'q|mapqual=n' => \$opts{'q'},
'v|version' => \$opts{'v'},
);
pod2usage(0) if($opts{'h'});
if($opts{'v'}){
print Sanger::CGP::AlleleCount->VERSION."\n";
exit;
}
pod2usage(1) if(!$opts{'b'} || !$opts{'l'});
pod2usage(1) if($opts{'g'} && $opts{'s'});
$opts{'m'} = $MIN_PBQ unless(defined $opts{'m'});
$opts{'q'} = $MIN_MAPQ unless(defined $opts{'q'});
return \%opts;
}
__END__
=head1 NAME
alleleCounts.pl - Generate tab seperated file with allelic counts and depth for each specified locus.
=head1 SYNOPSIS
Where possible use the C version for large data (it's also more configurable).
alleleCounts.pl
Required:
-bam -b BAM/CRAM file (expects co-located index)
- if CRAM see '-ref'
-output -o Output file [STDOUT]
-loci -l Alternate loci file (just needs chr pos)
- output is different, counts for each residue
Optional:
-ref -r genome.fa, required for CRAM (with colocated .fai)
-minqual -m Minimum base quality to include (integer) [30]
-mapqual -q Minimum mapping quality of read (integer) [35]
-gender -g flag, presence indicates loci file to be treated as gender SNPs.
- cannot be used with 's'
-snp6 -s flag, presence indicates loci file is SNP6 format.
- cannot be used with 'g'
- changes output format
-help -h This message
-version -v Version number
=cut
alleleCount-4.3.0/perl/bin/alleleCounterToJson.pl 0000775 0000000 0000000 00000005204 14073306636 0022003 0 ustar 00root root 0000000 0000000 #!/usr/bin/perl
##########LICENCE##########
# Copyright (c) 2014-2021 Genome Research Ltd.
#
# Author: CASM/Cancer IT
#
# This file is part of alleleCount.
#
# alleleCount is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation; either version 3 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see .
##########LICENCE##########
use strict;
use Carp;
use English qw( -no_match_vars );
use warnings FATAL => 'all';
use Getopt::Long 'GetOptions';
use Pod::Usage;
use Sanger::CGP::AlleleCount;
use Sanger::CGP::AlleleCount::ToJson;
{
my $options = option_builder();
$options->{'o'} = '/dev/stdout' unless(defined $options->{'o'});
run($options);
}
sub run {
my ($options) = @_;
my $json_string = Sanger::CGP::AlleleCount::ToJson::alleleCountToJson($options->{'a'}, $options->{'l'});
my $OUT;
if($options->{'o'}){
open($OUT, '>', $options->{'o'}) or croak("Error opening file for output: $!");
}
print $OUT "$json_string";
if($options->{'o'}){
close($OUT) or croak("Error closing output file for JSON conversion: $!");
}
}
sub option_builder {
my ($factory) = @_;
my %opts;
&GetOptions (
'h|help' => \$opts{'h'},
'l|locus-file=s' => \$opts{'l'},
'a|allelecount-file=s' => \$opts{'a'},
'o|output-file:s' => \$opts{'o'},
'v|version' => \$opts{'v'},
);
pod2usage(0) if($opts{'h'});
if($opts{'v'}){
print Sanger::CGP::AlleleCount->VERSION."\n";
exit;
}
pod2usage(1) if(!$opts{'l'} || !$opts{'a'});
croak("Locus file ".$opts{'l'}." does not exist.") if(! -e $opts{'l'});
croak("Allele count output file ".$opts{'a'}." does not exist.") if(! -e $opts{'a'});
return \%opts;
}
__END__
=head1 NAME
alleleCounterToJson.pl - Generate JSON format file from the tab seperated format
=head1 SYNOPSIS
alleleCounterToJson.pl
Required:
-locus-file -l File containing SNP positions used for allelecounter
-allelecount-file -a Allelecounter output file
Optional:
-output-file -o Output file (default: stdout)
-help -h This message
-version -v Version number
=cut
alleleCount-4.3.0/perl/docs.tar.gz 0000664 0000000 0000000 00000065403 14073306636 0017026 0 ustar 00root root 0000000 0000000 TtZ =ks۶_(i,7z%ٲsMs'mOHPbWAҎn& EJ-;4$XRo-^OPSNovZ4]:fDÈrBpߏkw9?&0h1qD&Aa[nM_#'jaxc4zON>v(,/7$;$041310pom݄EE7fkuJ@
# 9َM g4F