pax_global_header00006660000000000000000000000064142030254470014513gustar00rootroot0000000000000052 comment=2b3048acb669665806344fe3b230a0cf2bc2ddb6 megadepth-1.2.0/000077500000000000000000000000001420302544700134515ustar00rootroot00000000000000megadepth-1.2.0/.github/000077500000000000000000000000001420302544700150115ustar00rootroot00000000000000megadepth-1.2.0/.github/workflows/000077500000000000000000000000001420302544700170465ustar00rootroot00000000000000megadepth-1.2.0/.github/workflows/build.yml000066400000000000000000000013441420302544700206720ustar00rootroot00000000000000name: build on: push: branches: [ master ] pull_request: branches: [ master ] jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 # Install Dependencies #from https://github.com/danielecook/seq-collection/blob/master/.github/workflows/build.yml - name: Install dependencies (Linux) #if: runner.os == 'ubuntu-latest' run: | sudo apt-get update sudo apt-get -qy install make build-essential cmake libncurses-dev ncurses-dev libbz2-dev lzma-dev liblzma-dev \ curl libssl-dev libtool autoconf automake libcurl4-openssl-dev - name: build run: /bin/bash -x ./build_megadepth.sh - name: test run: /bin/bash tests/test.sh megadepth-1.2.0/.gitignore000066400000000000000000000001431420302544700154370ustar00rootroot00000000000000.idea build cmake-build-debug docopt *.zip .DS_Store bamcount bamcount-debug megadepth-debug attic megadepth-1.2.0/.gitmodules000066400000000000000000000005211420302544700156240ustar00rootroot00000000000000[submodule "htslib"] path = htslib_ci url = https://github.com/samtools/htslib [submodule "libBigWig"] path = libBigWig_ci url = https://github.com/dpryan79/libBigWig [submodule "libdeflate"] path = libdeflate_ci url = https://github.com/ebiggers/libdeflate [submodule "zlib"] path = zlib_ci url = https://github.com/madler/zlib megadepth-1.2.0/CMakeLists.txt.ci000066400000000000000000000027271420302544700166130ustar00rootroot00000000000000cmake_minimum_required(VERSION 2.6) set( CMAKE_VERBOSE_MAKEFILE on ) project(megadepth) set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -DMEGADEPTH_VERSION=\"\\\"`cat ../VERSION`\\\"\"") add_executable(megadepth_dynamic megadepth.cpp) add_executable(megadepth_static megadepth.cpp) add_executable(megadepth_statlib megadepth.cpp) include_directories(libdeflate htslib libBigWig) find_package(Threads REQUIRED) if(THREADS_HAVE_PTHREAD_ARG) set_property(TARGET megadepth_dynamic PROPERTY COMPILE_OPTIONS "-pthread") set_property(TARGET megadepth_dynamic PROPERTY INTERFACE_COMPILE_OPTIONS "-pthread") endif() if(CMAKE_THREAD_LIBS_INIT) target_link_libraries(megadepth_dynamic "${CMAKE_THREAD_LIBS_INIT}") endif() target_link_libraries(megadepth_dynamic z hts BigWig -L${CMAKE_SOURCE_DIR}/htslib -L${CMAKE_SOURCE_DIR}/libBigWig) #requires static libraries for both zlib and pthread target_link_libraries(megadepth_static -static ${CMAKE_SOURCE_DIR}/htslib/libhts.a ${CMAKE_SOURCE_DIR}/libBigWig/libBigWig.a ${CMAKE_SOURCE_DIR}/zlib/libz.a -lpthread ${CMAKE_SOURCE_DIR}/libdeflate/libdeflate.a) #this build a dynamic binary, but with htslib, libBigWig, and libz statically linked in, used for MacOS build #remember order is backwards, earliest needed libraries go *last* target_link_libraries(megadepth_statlib ${CMAKE_SOURCE_DIR}/htslib/libhts.a ${CMAKE_SOURCE_DIR}/libBigWig/libBigWig.a -lz -lcurl -lpthread ${CMAKE_SOURCE_DIR}/libdeflate/libdeflate.a) megadepth-1.2.0/CMakeLists.txt.static000066400000000000000000000016051420302544700175010ustar00rootroot00000000000000cmake_minimum_required(VERSION 2.6) set( CMAKE_VERBOSE_MAKEFILE on ) project(megadepth_static) set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -DMEGADEPTH_VERSION=\"\\\"`cat ../VERSION`\\\"\"") add_executable(megadepth_static megadepth.cpp) include_directories(libdeflate htslib_static libBigWig_static /hbb_shlib/include) #to work with HBB to statically link libcurl & friends set(CMAKE_EXE_LINKER_FLAGS "-static-libgcc -static-libstdc++") target_link_libraries(megadepth_static -static ${CMAKE_SOURCE_DIR}/htslib_static/libhts.a ${CMAKE_SOURCE_DIR}/libBigWig_static/libBigWig.a /hbb_shlib/lib/libz.a -lpthread ${CMAKE_SOURCE_DIR}/libdeflate/libdeflate.a) #target_link_libraries(megadepth_static -static ${CMAKE_SOURCE_DIR}/htslib/libhts.a ${CMAKE_SOURCE_DIR}/libBigWig/libBigWig.a ${CMAKE_SOURCE_DIR}/zlib/libz.a -lpthread ${CMAKE_SOURCE_DIR}/libdeflate/libdeflate.a) megadepth-1.2.0/CMakeLists.txt.statlib000066400000000000000000000013731420302544700176560ustar00rootroot00000000000000cmake_minimum_required(VERSION 2.6) set( CMAKE_VERBOSE_MAKEFILE on ) project(megadepth) set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -std=c++11 -DMEGADEPTH_VERSION=\"\\\"`cat ../VERSION`\\\"\"") add_executable(megadepth megadepth.cpp) include_directories(/hbb_shlib/include libdeflate htslib libBigWig) #to work with HBB to statically link libcurl & friends set(CMAKE_EXE_LINKER_FLAGS "-static-libgcc -static-libstdc++") target_link_libraries(megadepth ${CMAKE_SOURCE_DIR}/htslib/libhts.a ${CMAKE_SOURCE_DIR}/libBigWig/libBigWig.a ${CMAKE_SOURCE_DIR}/libdeflate/libdeflate.a /hbb_shlib/lib/libcurl.a /hbb_shlib/lib/libz.a -L/opt/rh/devtoolset-8/root/usr/lib/gcc/x86_64-redhat-linux/8 -L/hbb_shlib/lib -lm -lssl -lcrypto -ldl -lrt) megadepth-1.2.0/CMakeLists.txt.windows000066400000000000000000000023461420302544700177070ustar00rootroot00000000000000cmake_minimum_required(VERSION 2.6) set( CMAKE_VERBOSE_MAKEFILE on ) project(megadepth) set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -DMEGADEPTH_VERSION=\"\\\"`cat ../VERSION`\\\"\"") ##windows build related from here on #SET(CMAKE_SYSTEM_NAME Windows) #set(CMAKE_TRY_COMPILE_TARGET_TYPE "STATIC_LIBRARY") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DCURL_STATICLIB") add_executable(megadepth_static.exe megadepth.cpp getline.c) include_directories(megadepth_static.exe libcurl_windows/include zlib_windows htslib_windows libBigWig_windows) #set(CMAKE_EXE_LINKER_FLAGS "-static-libgcc -static-libstdc++") target_link_libraries(megadepth_static.exe -static ${CMAKE_SOURCE_DIR}/htslib_windows/libhts.a ${CMAKE_SOURCE_DIR}/libBigWig_windows/libBigWig.a ${CMAKE_SOURCE_DIR}/libdeflate_windows/libdeflate.a ${CMAKE_SOURCE_DIR}/libcurl_windows/lib/libcurl.a ${CMAKE_SOURCE_DIR}/nghttp2/lib/libnghttp2.a ${CMAKE_SOURCE_DIR}/openssl/lib/libssl.a ${CMAKE_SOURCE_DIR}/openssl/lib/libcrypto.a ${CMAKE_SOURCE_DIR}/libssh2/lib/libssh2.a ${CMAKE_SOURCE_DIR}/brotli/lib/libbrotlidec-static.a ${CMAKE_SOURCE_DIR}/brotli/lib/libbrotlicommon-static.a ${CMAKE_SOURCE_DIR}/zlib_windows/libz.a -lcrypt32 -lwldap32 -pthread -lwsock32 -lws2_32) megadepth-1.2.0/Dockerfile.build000066400000000000000000000003101420302544700165330ustar00rootroot00000000000000FROM centos:7 RUN yum update -y && yum install -y cmake wget gcc gcc-c++ bzip2 autoconf zlib-devel make zip unzip curl-devel ADD entry.sh /entry.sh RUN chmod a+x /entry.sh ENTRYPOINT ["/entry.sh"] megadepth-1.2.0/Dockerfile.osxcross000066400000000000000000000030011420302544700173170ustar00rootroot00000000000000FROM ubuntu RUN DEBIAN_FRONTEND=noninteractive apt-get update -yy && \ DEBIAN_FRONTEND=noninteractive apt-get install -yy gcc g++ libz-dev make unzip zip zlib1g-dev clang \ libmpc-dev \ libmpfr-dev \ libgmp-dev \ cmake \ automake \ bison \ curl \ file \ flex \ git \ libtool \ pkg-config \ texinfo \ vim \ wget # Install osxcross # NOTE: The Docker Hub's build machines run varying types of CPUs, so an image # built with `-march=native` on one of those may not run on every machine - I # ran into this problem when the images wouldn't run on my 2013-era Macbook # Pro. As such, we remove this flag entirely. ENV OSXCROSS_SDK_VERSION 10.8 RUN mkdir /opt/osxcross && \ cd /opt && \ git clone https://github.com/tpoechtrager/osxcross.git && \ cd osxcross && \ ./tools/get_dependencies.sh && \ curl -L -o ./tarballs/MacOSX${OSXCROSS_SDK_VERSION}.sdk.tar.xz \ https://s3.amazonaws.com/andrew-osx-sdks/MacOSX${OSXCROSS_SDK_VERSION}.sdk.tar.xz && \ yes | PORTABLE=true ./build.sh && \ ./build_gcc.sh ENV PATH $PATH:/opt/osxcross/target/bin CMD /bin/bash megadepth-1.2.0/Dockerfile.run000066400000000000000000000003011420302544700162400ustar00rootroot00000000000000FROM centos:8 RUN yum update -y --skip-broken && yum install --skip-broken -y wget zlib zip unzip libcurl ADD megadepth_statlib /megadepth RUN chmod a+x /megadepth ENTRYPOINT ["/megadepth"] megadepth-1.2.0/Dockerfile.windows000066400000000000000000000016271420302544700171420ustar00rootroot00000000000000FROM ubuntu:latest RUN DEBIAN_FRONTEND=noninteractive apt-get update -yy && \ DEBIAN_FRONTEND=noninteractive apt-get install -yy gcc g++ libz-dev libcurl4-openssl-dev make unzip zip zlib1g-dev clang g++-mingw-w64 g++-mingw-w64-i686 g++-mingw-w64-x86-64 gcc-mingw-w64 gcc-mingw-w64-base gcc-mingw-w64-i686 gcc-mingw-w64-x86-64 mingw-w64 mingw-w64-common mingw-w64-i686-dev mingw-w64-tools mingw-w64-x86-64-dev binutils-mingw-w64 binutils-mingw-w64-i686 binutils-mingw-w64-x86-64 \ libmpc-dev \ libmpfr-dev \ libgmp-dev \ cmake \ automake \ bison \ curl \ file \ flex \ git \ libtool \ pkg-config \ texinfo \ vim \ wget CMD /bin/bash megadepth-1.2.0/LICENSE.txt000066400000000000000000000022201420302544700152700ustar00rootroot00000000000000The MIT License Copyright (c) 2018- Christopher Wilks and Ben Langmead Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. megadepth-1.2.0/README.md000066400000000000000000000530341420302544700147350ustar00rootroot00000000000000![Megadepth_logo](megadepth_logo.png) [![Join the chat at https://gitter.im/megadepth/community](https://badges.gitter.im/megadepth/community.svg)](https://gitter.im/megadepth/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) ![build](https://github.com/ChristopherWilks/megadepth/workflows/build/badge.svg) BigWig and BAM/CRAM related utilities [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/megadepth/README.html) We recommend use of one of the pre-compiled binaries (only x86_64): * [Linux](https://github.com/ChristopherWilks/megadepth/releases/download/1.1.0/megadepth)* * [MacOS](https://github.com/ChristopherWilks/megadepth/releases/download/1.1.0/megadepth_macos) * [Windows Native](https://github.com/ChristopherWilks/megadepth/releases/download/1.1.0/megadepth.exe) *this binary has the HTSlib, libBigWig, libcurl, libdeflate, & zlib libraries statically linked. Megadepth is also available under [R/Bioconductor](http://www.bioconductor.org/packages/release/bioc/html/megadepth.html) If none of those options work, the a Docker image and build instructions are described at the end of this README. If you find Megadepth useful, please cite the publication: `Wilks, C, Ahmed, O, Baker, DN, Zhang, D, Collado-Torres, L, Langmead, B (2021). Megadepth: efficient coverage quantification for BigWigs and BAMs. Bioinformatics` [Releases prior to 1.0.2 used the previous name "bamcount"] # Usage For any remote file processing, either BAM or BigWigs, you *must* use the `--prefix ` option. Additionally, when using `--annotation`, `--op ` can be used to change the mode of summary: * BAMs, `` can be `sum` (default) or `mean` * BigWigs, `` can be `sum` (default), `mean`, `min`, or `max` ## BigWig Processing ``` megadepth /path/to/bigwigfile --annotation --op ``` Concrete example command for sample `SRR1258218` (NA12878 Illumina RNA-seq), this will produce 1) means for the intervals listed in `exons.bed` and 2) the total annotated AUC (output `STDOUT`): ``` megadepth SRR1258218.bw --annotation exons.bed --op mean --auc ``` Or if you only want the AUC for the whole BigWig: ``` megadepth SRR1258218.bw ``` ## BAM/CRAM processing While megadepth doesn't require a BAM/CRAM index file (typically `.bam.bai` or `.bam.crai`) to run, it *does* require that the input BAM be sorted by chromosome at least. This is because megadepth allocates a per-base counts array across the entirety of the current chromosome before processing the alignments from that chromosome. If reads alignments are not grouped by chromosome in the BAM, undefined behavior will occur including massive slow downs and/or memory allocations. A BAM/CRAM index file is recommended for best performance on sparse regions when `--annotation ` is used. If a CRAM file is being processed, the reference FASTA may be retrieved from an external webserver (default) or specified with `--fasta /path/to/reference.fa`. Read alignments can be filtered in (inclusion) via `--filter-in ` or filtered out (exclusion) via `--filter-out `, where `` is as bitmask according to the SAM specification in decimal. The defaults are `--filter-in 65535` and `--filter-out 260` to skip only unmapped and secondary alignments, processing everything else. Concrete example command for sample `SRR1258218` (NA12878 Illumina RNA-seq): ``` megadepth SRR1258218.sorted.bam --threads 4 --bigwig --auc --annotation exons.bed --prefix SRR1258218 ``` If you only want to get a coverage summary (either sum or mean) over a set of intervals, you may see a performance boost if you have a BAM index at the same path as the BAM file: ``` megadepth SRR1258218.sorted.bam --annotation exons.bed --prefix SRR1258218 --gzip ``` Also, the optional `--gzip` flag in the above example will automatically turn off writing to `STDOUT` any coverage (either base or annotation), and will instead write coverage to block gzipped files using the `--prefix` or input filename as the base filename. These block gzipped files will also have a Tabix-like index `.csi` built for them as well. There's a known bug where chromosomes with 0 coverage are still reported in the block-gzipped files but are not indexed. # BAM/CRAM Processing Subcommands ![BAM Processing](megadepth_detailed.png) For any and all subcommands below, if run together, `megadepth` will do only one pass through the BAM file. While any given subcommand may not be particularly fast on its own, doing them all together can save time. Subcommand `--bigwig` is the only subcommand that will output a BigWig file with the suffix `.coverage.bw`. If `--min-unique-qual` and `--bigwig` are specified the "unique" coverage will also be written to a separate BigWig file with the suffix `.unique.bw`. Also, `--bigwig` will not work on Windows, megadepth as of release 1.0.5 will simply skip writing a BigWig if this option is passed in with the Windows build, but will process other options which still make sense (e.g. `--auc`). ## Coverage over regions ### `megadepth /path/to/bamfile --annotation ` generates per-base counts across all regions in `` file. This will use the BAM index file (.bai) if it exists, otherwise it will do a linear walkthrough the BAM, but only reporting the coverage for the regions specified (typically much slower w/o an index). The annotation BED file does not need to be sorted in any particular way. megadepth will output the summed coverages for the annotation in contiguous blocks per chromosome. This will be the same order as the BED file *if* coordinates from the same chromosome are contiguous in the BED file (typically they are). You can skip the index with `--no-index` in cases where the regions cover nearly the whole genome (can be faster than jumping around the index). ### `megadepth /path/to/bamfile --annotation ` generates coverage sums over a specified number of base pair length contiguous windows of the genome (e.g. 400 bp). All subcommands here will default to reporting to `STDOUT` unless `--no-annotation-stdout` or `--gzip` is passed in. ## Coverage over the whole genome There's multiple ways to get whole genome, per-base coverage: ### `megadepth /path/to/bamfile --coverage` Generates per-base counts of overlapping reads across all bases of the genome. All coverage is included, even mismatching bases. So this output should be thought of as ref base + alternate base sums (if using `--alts` in addition, see below). Typically this is used to produce a BigWig, but can be used w/o the `--bigwig` option to just output TSVs Will default to reporting to `STDOUT` unless `--no-coverage-stdout` or `--gzip` is passed in. By default, `--coverage` and `--bigwig` (below) will not double count coverage where paired-end reads overlap (same as `Mosdepth`'s default). However, double counting can be allowed with the `--double-count` option, which may result in faster running times if precise counting is not needed. ### `megadepth /path/to/bamfile --bigwig` Outputs coverage (same as `--coverage) except as BigWig file(s) instead of TSVs (including for `--min-unique-qual` option), this is an alterate subcommand to `--coverage`. ### `megadepth /path/to/bamfile --auc` Reports area-under-coverage across all bases (one large sum of overlapping reads, per-base). This will also report additional counts for: * `min-unique-qual` only for reads with MAPQ >= to this setting * `--annotation`: only for bases in the annotated regions This computes the coverage (same as `--coverage` and `--bigwig`) under the hood, but won't output it unless `--coverage` or `--bigwig` is also passed in. Will default to reporting to `STDOUT` unless `--no-auc-stdout` is passed in. ## Fragment Length Distribution ### `megadepth /path/to/bamfile --frag-dist` Outputs fragment length distribution adjusting for intron lengths. Mean, mode statistics are reported at the end of the output with string tag `STATS`. This uses the absolute value of the `TLEN` field but uses additional filters similar to [csaw](https://github.com/LTLA/csaw)'s fragment length calculation. The following alignments are filtered out: * secondary * supplementary * not paired * unmapped * mate unmapped * discordant (mates not on same chromosome/reference) Further, read mates must be on forward/reverse strands and the forward mate must not be downstream of the reverse mate. Intron length(s) in the paired alignments are also subtracted from the `TLEN` field except where the `TLEN` field is smaller than the combined length of the introns, in which case the `TLEN` is reported as is. These numbers should be taken as an estimation of the fragment length distribtion. Reports to a file with suffix `.frags.tsv`. ## Alternate Base Coverage ### `megadepth /path/to/bamfile --alts` Outputs information about non-reference-matching portions of reads. Output is comma separated with 4 fields: | Pos | Descrtiption | |-------|------------------------------------------------------------------------| | 1 | Reference/chromosome ID in the BAM file (integer) | | 2 | POS field (0-based offset of leftmost aligned ref base) | | 3 | Operation label (see table below) | | 4 | Extra info (see table below) | | 5 | Read ID/name if paired alignment overlaps between mates (blank if not) | | 6 | Intentionally left blank | As field 6 is always blank, there will always be a trailing ',' for X, I, and D records. These could be of a few types, summarized in this table. All of these are available when the `MD:Z` extra flag is present. If not present, only the ones with "Yes" in the "No `MD:Z`" column are reported. | Label | Type | Extra info | No `MD:Z` | |-------|------------|----------------------|------------| | `X` | Mismatch | Read base | No | | `D` | Deletion | # deleted bases | Yes | | `I` | Insertion | Inserted read bases | Yes | | `S` | Soft clip | Soft clipped bases | Yes | | `H` | Hard clip | (nothing) | Yes | | `P` | Padding | (nothing) | Yes | For example, assuming a BAM file produced by the STAR v2.7.3a aligner which has an alignment record at chromosome 11, starting as position 100 (1-base), with a mismatch of a `T` (alt) vs. a `G` (ref) 11 bases from the left starting position of the alignment (inclusive): `... chr11 100 ... MD:Z:10G20` there will be a corresponding line in the output of `--alts` `10,109,X,T,,` where `T` is the base in the read sequence aligned in that record (reference `G`), and 10 is the offset of the chromosome ID from the BAM header (this offset will vary with the reference used to align). The empty field is reserved for the case where the two mates in a read pair have an overlapping alignment. In that case the read ID/name is printed in the 5th field to indicate overlap. Typically this will allow for the removal of duplicate alternative base calls and help to inform deciding between conflicting calls where the overlapping mates may not have the same call. This only applies to mismatches ('X'), insertions ('I'), and deletions ('D'), not to soft-clipping (below). If coverage is not being computed (`--bigwig` or `--coverage`) as well, overlapping pairs will *not* be flagged when `--alts` is used. See the usage message for options, which can selectively disable some of the outputs listed above. E.g. the soft-clipping outputs can be very large, so they're not printed unless `--include-softclip` is specified. Reports to a file with suffix `.alts.tsv`. ### `megadepth /path/to/bamfile --alts --include-softclip` In addition to the alternate base output, this reports the bases that were softclipped at the ends (start/end) of the read. These are bases which are left in the sequence but don't align. The softclipped bases themselves are printed to the file named with the prefix passed into the `--alts` option. The total number of sofclipped bases and the total number of bases from the query sequences of alignments that that aren't unmapped or secondary are reported to the file named with the prefix passed to `--include-softclip`. Warning: using this option w/o modifiers (e.g. `--only-polya`) could blow up the `--alts` output size as the full softclipped sequence is printed in the 4th column in the table above ("Extra info"). Reports to a file with suffix `.softclip.tsv` in addition to the `--alts` file. ### `megadepth /path/to/bamfile --alts --include-softclip --only-polya` If reporting softclipped bases, this option will limit the report to only those bases that have the following: * Count of bases in the sofclip (column 4 below) has to be >= 3 * % of base (A/T) of softclipped bases for an alignment >= 80% No other sofclipped bases are reported. Output is comma separated with 7 fields: | Pos | Description| |-------|----------------------------------------------------------------------------------| | 1 | Reference/chromosome ID in the BAM file (integer) | | 2 | POS field (0-based ref offset of either leftmost or rightmost aligned base) | | 3 | Operation label (always "S") | | 4 | Number of bases in the softclip (run length) | | 5 | Intentionally left blank to be compatible with previous alt. format | | 6 | Direction to move from POS ('+' for end of alignment, '-' for start of alignment)| | 7 | Base (A/T) | | 8 | Count of the base in column 6 | ## Junctions ### Reporting All Junctions #### `megadepth /path/to/bamfile --all-junctions` Extracts all intron spanning alignments from the BAM and reports them, one per line. Output format: | Pos | Description| |--------|---------------------------------------------------------------------------------------| | 1 | Read name (for removing duplicate junction reports for overlapping read mates) | | 2 | Reference/chromosome ID in the BAM file (integer) | | 3 | Start coordinate of intron (1-based ref offset of leftmost base of intron) | | 4 | End coordinate of intron (1-based ref offset of rightmost base of intron) | | 5* | Mapping strand of alignment (0 forward, 1 reverse) | | 6 | Cigar string (useful for determining anchor lengths) | | 7 | Is unique alignment? (0:no, 1:yes; needed for counting unique split read support) | \*This is the strand of the alignment, not necessarily the strand of the junction, that has to be determined by the dinucleotide motifs at the coordinates given. However, if the BAM includes the `XS:A` tag for certain alignment records the strand here will be `+` or `-` taken from that tag and then indicates the true strand of source transcript based on if canonical splicing info is available. This output can be further processed by the script, `junctions/process_jx_output.sh` to get a merged set of junctions with unique and multi-mapping counts, compatible with STAR's `SJ.out` junction file (sans the last column which is left blank, but would be maximum anchor length per junction if coming from STAR). NOTE: No junction filtering is done by Megadepth here, it will simply report every potential junction it finds in the BAM file, subject to the general alignment filters already in place (e.g. using the SAM flags to filter out umapped reads, secondary alignments, etc...). If comparing with STAR's SJ.out file for the same BAM, be aware that STAR will likely not do the same alignment level filtering, so the results will be different in terms of 1) number of junctions found and 2) split-read counts for a junction found in both approaches. Reports to a file with suffix `.all_jxs.tsv`. ### Co-occurring Junctions #### `megadepth /path/to/bamfile --junctions` Extract locally co-occurring junctions from BAM. This does not extract all potential junctions (use `--all-junctions` for that), only those for which a read (or read pair) had >= 2 junctions. This can be run instead of OR in addition to `--all-junctions`, to specifically report co-occurring junctions. Since the output format below includes co-occurring jx's on the same line, it's potentially useful to run both. In a paired context, there must be at least 2 junctions across the 2 read mates to be output. Output is tab separated with 6-12 fields (the last 6 fields are for a 2nd mate if applicable): | Pos | Description| |--------|---------------------------------------------------------------------------------------| | 1 | Reference/chromosome ID in the BAM file (integer) | | 2 | POS field (1-based ref offset of either leftmost base) | | 3 | Mapping strand (0 forward, 1 reverse)** | | 4 | Insert length (0 if not paired) | | 5 | Cigar string (useful for determining anchor lengths) | | 6 | List of junction coordinates (comma-delimited) | | 7 | Is unique alignment? (0:no, 1:yes; needed for counting unique split read support) | | 8* | Mate reference record ID | | 9* | Mate POS field (1-based ref offset of either leftmost base) | | 10* | Mate mapping strand (0 forward, 1 reverse)** | | 11* | Mate insert length (0 if not paired) | | 12* | Mate cigar string (useful for determining anchor lengths) | | 13* | Mate list of junction coordinates (comma-delimited) | | 14* | Mate is unique alignment? (0:no, 1:yes; needed for counting unique split read support)| \*optional, output if a 2nd mate is present and has the required number of junctions. \*\*This is the strand of the alignment, not necessarily the strand of the junction, that has to be determined by the dinucleotide motifs at the coordinates given. However, if the BAM includes the `XS:A` tag for certain alignment records the strand here will be `+` or `-` taken from that tag and then indicates the true strand of source transcript based on if canonical splicing info is available. If you get a core dump when running on longer reads (e.g. BAM's produced by PacBio/Oxford Nanopore sequencing), or something like this abort message: ```*** Error in `megadepth': free(): invalid next size (normal): 0x0000000001d30780 ***``` then try adding the argument `--long-reads` as it will enlarge the buffer used to store the output junction string. This enables megadepth to have a better chance of handling really long CIGAR strings. Reports to a file with suffix `.jxs.tsv`. Similar to `--all-junctions`, no junction filtering is done by Megadepth for this option, it will simply report all co-occurring, potential junctions it finds in the BAM file. # Docker Additionally, there is a Docker image that can be used to run `megadepth`: https://quay.io/repository/broadsword/megadepth?tab=tags For running under Docker, you'll probably want to map in a directory on the host system into the container via the `-v` option so you can pass an annotation file in and get output back: ``` docker run -v `pwd`:/data --annotation /data/.bed --prefix /data/output_file_prefix ``` Currently, `libcurl` throws a warning about version information, this can be ignored. # Building ## Build dependencies * [htslib](http://www.htslib.org) * See `get_htslib.sh` for a script that gets a recent version and compiles it with minimal dependencies * [libBigWig](https://github.com/dpryan79/libBigWig) * See `get_libBigWig.sh` for a script that gets a recent version and compiles it * [libdeflate](https://github.com/ebiggers/libdeflate) * See `get_libdeflate.sh` for a script that gets a recent version and compiles it * zlib static library [only if building a static binary] * See `get_zlib.sh` for a script that gets a recent version and compiles the static library ## Building Before attempting to build Megadepth, we strongly suggest the user try to use one of the pre-built binaries or the Docker container provided at the top of this README. The 3 primary dependencies for this build (htslib, libBigWig, libdeflate) are brought in via git's `submodule` system automatically. To build Megadepth on a reasonably modern x86 Linux machine (with `libcurl` and `zlib`), run: `build_megadepth.sh` This builds a fully dynamic binary which requires that libraries for htslib and libBigWig be available in the environment where megadepth will be run. We have provided a wrapper script, `megadepth`, which sets the environment to reference those two libraries where they were cloned, so megadepth can be run on the build machine. megadepth-1.2.0/VERSION000066400000000000000000000000061420302544700145150ustar00rootroot000000000000001.2.0 megadepth-1.2.0/build_megadepth.sh000077500000000000000000000051421420302544700171270ustar00rootroot00000000000000#!/usr/bin/env bash set -ex #build dynamic by default build_type=$1 bc=`perl -e '$bt="'$build_type'"; if($bt=~/static/i) { print "megadepth_static"; } else { print "megadepth_dynamic"; }'` #make sure submodules are present git submodule update --init --recursive export SUBMODULE=1 ln -fs CMakeLists.txt.ci CMakeLists.txt #clear symlink main lib dirs rm -rf zlib htslib libBigWig libdeflate build-release-temp if [[ -n $build_type && "$build_type" == "static" ]] ; then if [[ ! -s zlib_ci/libz.a ]] ; then if [[ -n $SUBMODULE ]]; then ln -fs zlib_ci zlib fi ./get_zlib.sh if [[ -z $SUBMODULE ]]; then mv zlib zlib_ci fi fi ln -fs zlib_ci zlib fi if [[ ! -s libdeflate_ci/libdeflate.a ]] ; then if [[ -n $SUBMODULE ]]; then ln -fs libdeflate_ci libdeflate fi ./get_libdeflate.sh if [[ -z $SUBMODULE ]]; then mv libdeflate libdeflate_ci fi fi ln -fs libdeflate_ci libdeflate htslib_to_link="htslib_ci" if [[ -n $build_type && "$build_type" == "static" ]]; then htslib_to_link="htslib_static" fi if [[ ! -s htslib_ci/libhts.so && "$htslib_to_link" != "htslib_static" ]] || [[ ! -s htslib_static/libhts.a && "$htslib_to_link" == "htslib_static" ]]; then export CPPFLAGS="-I../libdeflate" export LDFLAGS="-L../libdeflate -ldeflate" if [[ "$htslib_to_link" == "htslib_static" ]]; then ./get_htslib.sh linux static else if [[ -n $SUBMODULE ]]; then ln -fs htslib_ci htslib fi ./get_htslib.sh linux if [[ -z $SUBMODULE ]]; then mv htslib htslib_ci fi fi export CPPFLAGS= export LDFLAGS= fi ln -fs $htslib_to_link htslib if [[ ! -s libBigWig_ci/libBigWig.so ]] ; then if [[ -n $SUBMODULE ]]; then ln -fs libBigWig_ci libBigWig fi ./get_libBigWig.sh if [[ -z $SUBMODULE ]]; then mv libBigWig libBigWig_ci fi pushd libBigWig_ci make clean make -f Makefile.orig lib-shared popd fi ln -fs libBigWig_ci libBigWig #compile a no-curl static version of libBigWig if [[ $bc == 'megadepth_static' ]]; then pushd libBigWig make clean make -f Makefile.nocurl lib-static popd fi export LD_LIBRARY_PATH=./htslib:./libBigWig:$LD_LIBRARY_PATH DR=build-release-temp mkdir -p ${DR} pushd ${DR} cmake -DCMAKE_BUILD_TYPE=Release .. make ${bc} popd cp ${DR}/${bc} ./megadepth_release ./megadepth --version rm -rf ${DR} DR=build-debug-temp mkdir -p ${DR} pushd ${DR} cmake -DCMAKE_BUILD_TYPE=Debug .. make ${bc} popd cp ${DR}/${bc} ./megadepth_debug ./megadepthd --version rm -rf ${DR} megadepth-1.2.0/build_no_container_hbb.sh000077500000000000000000000050011420302544700204540ustar00rootroot00000000000000#!/usr/bin/env bash set -ex #yum install cmake -yy working_dir=$(dirname $0) pushd $working_dir #clear symlink main lib dirs rm -rf zlib htslib libBigWig libdeflate build-release-temp export PATH=/opt/rh/devtoolset-8/root/usr/bin:$PATH export CFLAGS="-g -O2 -fvisibility=hidden -DCURL_STATICLIB -fPIC" export CPPFLAGS="-g -O2 -fvisibility=hidden -I/hbb_shlib/include" export LDFLAGS="-L/hbb_shlib/lib -static-libstdc++" export SHLIB_LDFLAGS="-static-libstdc++" export STATICLIB_CFLAGS="-g -O2 -fvisibility=hidden -fPIC" export STATICLIB_CPPFLAGS="-g -O2 -fvisibility=hidden -I/hbb_shlib/include" build_type=$1 bc=`perl -e '$bt="'$build_type'"; if($bt=~/static/i) { print "megadepth_static"; } else { print "megadepth"; }'` if [[ "$bc" == "megadepth_static" ]]; then ln -fs CMakeLists.txt.static CMakeLists.txt else ln -fs CMakeLists.txt.statlib CMakeLists.txt fi #dont need our own zlib, since it's already statically compiled in HBB if [[ ! -s libdeflate_hbb ]] ; then ./get_libdeflate.sh mv libdeflate libdeflate_hbb fi ln -fs libdeflate_hbb libdeflate if [[ ! -s htslib_hbb ]] ; then export CPPFLAGS="$CPPFLAGS -I../libdeflate" #for staticlly linking libcurl export LDFLAGS="-static-libstdc++ -L/hbb_shlib/lib -L../libdeflate" export LIBS="-lm -ldl -lssl -lcrypto -lz -ldeflate -lrt -pthread" ./get_htslib.sh linux hbb export CPPFLAGS="-g -O2 -fvisibility=hidden -I/hbb_shlib/include" export LDFLAGS="-L/hbb_shlib/lib -static-libstdc++" export LIBS= #reset env vars mv htslib htslib_hbb fi ln -fs htslib_hbb htslib if [[ ! -s libBigWig_hbb ]] ; then ./get_libBigWig.sh pushd libBigWig export CFLAGS="-O2 -I/hbb_shlib/include -DCURL_STATICLIB -fPIC" make clean make -f Makefile.fpic lib-static popd mv libBigWig libBigWig_hbb fi ln -fs libBigWig_hbb libBigWig export CFLAGS="-g -O2 -fvisibility=hidden -I/hbb_shlib/include -DCURL_STATICLIB -fPIC" export LDFLAGS="-L/hbb_shlib/lib -static-libstdc++" set -x export LD_LIBRARY_PATH=./htslib:./libBigWig:$LD_LIBRARY_PATH DR=build-release-temp mkdir -p ${DR} pushd ${DR} cmake -DCMAKE_BUILD_TYPE=Release .. make ${bc} popd cp ${DR}/${bc} ./ #ln -fs ./$bc megadepth ./megadepth --version rm -rf ${DR} mv megadepth megadepth.full strip -s megadepth.full -o megadepth DR=build-debug-temp mkdir -p ${DR} pushd ${DR} cmake -DCMAKE_BUILD_TYPE=Debug .. make ${bc} popd cp ${DR}/${bc} ./${bc}_hbb_debug #ln -fs ./${bc}_debug megadepth_debug ./megadepth_hbb_debug --version rm -rf ${DR} ln -fs ./megadepth_hbb_debug mddebug megadepth-1.2.0/build_no_container_macos.sh000077500000000000000000000040531420302544700210310ustar00rootroot00000000000000#!/usr/bin/env bash set -ex working_dir=$(dirname $0) pushd $working_dir ln -fs CMakeLists.txt.ci CMakeLists.txt #clear symlink main lib dirs rm -rf zlib htslib libBigWig libdeflate build-release-temp export APPLE_VER=12 OSXCROSS_ROOT=/opt/osxcross/target/bin export PATH=$OSXCROSS_ROOT:$PATH export OSX_ARCH=x86_64 export OSX_CC=o64-gcc export OSX_CXX=o64-g++ export CC=${OSX_CC} export CXX=${OSX_CXX} export AR=${OSXCROSS_ROOT}/$OSX_ARCH-apple-darwin${APPLE_VER}-ar export RANLIB=${OSXCROSS_ROOT}/$OSX_ARCH-apple-darwin${APPLE_VER}-ranlib export compiler=$OSX_ARCH-apple-darwin${APPLE_VER} #only build statlib for macos, no static support: #see https://stackoverflow.com/questions/5259249/creating-static-mac-os-x-c-build bc=megadepth_statlib if [[ ! -s libdeflate_macos ]] ; then ./get_libdeflate.sh $compiler macos fi ln -fs libdeflate_macos libdeflate export CFLAGS="-I../libdeflate" export LDFLAGS="-L../libdeflate -ldeflate" if [[ ! -e htslib_macos/libhts.a ]] ; then ./get_htslib.sh $compiler macos fi ln -fs htslib_macos htslib if [[ ! -s libBigWig_macos ]] ; then ./get_libBigWig.sh macos pushd libBigWig_macos make clean make -f Makefile.fpic lib-static popd fi ln -fs libBigWig_macos libBigWig export CFLAGS= export LDFLAGS= set -x export LD_LIBRARY_PATH=./htslib:./libBigWig:$LD_LIBRARY_PATH DR=build-release-temp mkdir -p ${DR} pushd ${DR} #CC=${OSX_CC} CXX=${OSX_CXX} cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_AR=/opt/osxcross/target/bin/$OSX_ARCH-apple-darwin${APPLE_VER}-ar -DCMAKE_RANLIB=/opt/osxcross/target/bin/$OSX_ARCH-apple-darwin${APPLE_VER}-ranlib -D CMAKE_C_COMPILER=${OSX_CC} -D CMAKE_CXX_COMPILER=${OSX_CXX} .. #CC=${OSX_CC} CXX=${OSX_CXX} AR=/opt/osxcross/target/bin/$OSX_ARCH-apple-darwin${APPLE_VER}-ar RANLIB=/opt/osxcross/target/bin/$OSX_ARCH-apple-darwin${APPLE_VER}-ranlib make ${bc} cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_AR=${AR} -DCMAKE_RANLIB=${RANLIB} -D CMAKE_C_COMPILER=${OSX_CC} -D CMAKE_CXX_COMPILER=${OSX_CXX} .. make ${bc} popd cp ${DR}/${bc} ./megadepth_macos #./megadepth_macos --version rm -rf ${DR} megadepth-1.2.0/build_no_container_windows.sh000077500000000000000000000047441420302544700214300ustar00rootroot00000000000000#!/usr/bin/env bash set -ex bc=megadepth_static working_dir=$(dirname $0) pushd $working_dir ln -fs CMakeLists.txt.windows CMakeLists.txt #clear symlink main lib dirs rm -rf zlib htslib libBigWig libdeflate libcurl build-release-temp #.e.g x86_64-w64-mingw32 or i686-w64-mingw32 compiler=x86_64-w64-mingw32 #e.g. 64 (32 doesn't work at this point) arch=64 export CC=${compiler}-gcc export CXX=${compiler}-g++ export AR=${compiler}-ar export RANLIB=${compiler}-ranlib if [[ ! -s mingw-std-threads ]]; then git clone https://github.com/meganz/mingw-std-threads.git fi CURL_VER=7.71.1 ARCH=$arch #32 #64 as of 2020-07-31, 32bit doesn't work due to recv not being linked properly in 32bit mingw libws2_32 if [[ ! -s libcurl_windows ]] ; then for f in curl-${CURL_VER}-win${ARCH}-mingw.zip libssh2-1.9.0-win${ARCH}-mingw.zip openssl-1.1.1g-win${ARCH}-mingw.zip nghttp2-1.41.0-win${ARCH}-mingw.zip brotli-1.0.7-win${ARCH}-mingw.zip zlib-1.2.11-win${ARCH}-mingw.zip; do wget "https://curl.haxx.se/windows/dl-${CURL_VER}/${f}" -O $f unzip $f done ln -fs curl-${CURL_VER}-win${ARCH}-mingw libcurl_windows ln -fs libssh2-1.9.0-win${ARCH}-mingw libssh2 ln -fs openssl-1.1.1g-win${ARCH}-mingw openssl ln -fs nghttp2-1.41.0-win${ARCH}-mingw nghttp2 ln -fs brotli-1.0.7-win${ARCH}-mingw brotli ln -fs zlib-1.2.11-win${ARCH}-mingw zlib_windows fi if [[ ! -s libdeflate_windows ]] ; then ./get_libdeflate.sh $compiler windows fi if [[ ! -s htslib_windows ]] ; then export CFLAGS="-I../libdeflate_windows -I../zlib_windows" export LDFLAGS="-L../libdeflate_windows -L../zlib_windows -lz -ldeflate" ./get_htslib.sh $compiler windows export CFLAGS= export LDFLAGS= fi if [[ ! -s libBigWig_windows ]] ; then ./get_libBigWig.sh windows pushd libBigWig_windows export CFLAGS="-I../libcurl_windows/include -I../zlib_windows -g -Wall -O3 -Wsign-compare -DCURL_STATICLIB" make clean make CC=${compiler}-gcc AR=$AR RANLIB=$RANLIB -f Makefile.fpic lib-static export CFLAGS= popd fi set -x DR=build-release-temp mkdir -p ${DR} pushd ${DR} MD_VER=`cat ../VERSION` cmake -DCMAKE_SYSTEM_NAME=Windows -DCMAKE_TRY_COMPILE_TARGET_TYPE=STATIC_LIBRARY -DCMAKE_CXX_FLAGS="-std=c++11 -DCURL_STATICLIB -DMEGADEPTH_VERSION=$MD_VER -DWINDOWS_MINGW" -DCMAKE_EXE_LINKER_FLAGS="-static-libgcc -static-libstdc++" CC=$CC CXX=$CXX AR=$AR RANLIB=$RANLIB -DCMAKE_BUILD_TYPE=Release .. make ${bc}.exe popd cp ${DR}/${bc}.exe.exe ./megadepth.exe rm -rf ${DR} megadepth-1.2.0/conda/000077500000000000000000000000001420302544700145355ustar00rootroot00000000000000megadepth-1.2.0/conda/megadepth/000077500000000000000000000000001420302544700164735ustar00rootroot00000000000000megadepth-1.2.0/conda/megadepth/CMakeLists.txt000066400000000000000000000014671420302544700212430ustar00rootroot00000000000000cmake_minimum_required(VERSION 2.6) set( CMAKE_VERBOSE_MAKEFILE on ) project(megadepth) set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -DMEGADEPTH_VERSION=\"\\\"`cat ../VERSION`\\\"\"") add_executable(megadepth_dynamic megadepth.cpp) add_executable(megadepth_static megadepth.cpp) add_executable(megadepth_statlib megadepth.cpp) include_directories(include) find_package(Threads REQUIRED) if(THREADS_HAVE_PTHREAD_ARG) set_property(TARGET megadepth_dynamic PROPERTY COMPILE_OPTIONS "-pthread") set_property(TARGET megadepth_dynamic PROPERTY INTERFACE_COMPILE_OPTIONS "-pthread") endif() if(CMAKE_THREAD_LIBS_INIT) target_link_libraries(megadepth_dynamic "${CMAKE_THREAD_LIBS_INIT}") endif() target_link_libraries(megadepth_dynamic z hts BigWig deflate -L${CMAKE_SOURCE_DIR}/lib) megadepth-1.2.0/conda/megadepth/LICENSE.txt000066400000000000000000000022131420302544700203140ustar00rootroot00000000000000The MIT License Copyright (c) 2018- Christopher Wilks and Ben Langmead Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. megadepth-1.2.0/conda/megadepth/build.sh000077500000000000000000000017361420302544700201400ustar00rootroot00000000000000#!/usr/bin/env bash #based in part on https://github.com/AnacondaRecipes/libnetcdf-feedstock/blob/master/recipe/build.sh #and on the WiggleTools conda build script: #https://github.com/bioconda/bioconda-recipes/blob/master/recipes/wiggletools/build.sh set -ex declare -a CMAKE_PLATFORM_FLAGS if [[ ${HOST} =~ .*darwin.* ]]; then CMAKE_PLATFORM_FLAGS+=(-DCMAKE_OSX_SYSROOT="${CONDA_BUILD_SYSROOT}") else CMAKE_PLATFORM_FLAGS+=(-DCMAKE_TOOLCHAIN_FILE="${RECIPE_DIR}/cross-linux.cmake") fi if [[ ${DEBUG_C} == yes ]]; then CMAKE_BUILD_TYPE=Debug else CMAKE_BUILD_TYPE=Release fi #build dynamic by default bc="megadepth_dynamic" cd $SRC_DIR mkdir -p include lib cp -r $PREFIX/include/* include/ cp -r $PREFIX/lib/* lib/ ln -fs ${RECIPE_DIR}/CMakeLists.txt ./ DR=build-release-temp mkdir -p ${DR} pushd ${DR} cmake -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${PREFIX} ${CMAKE_PLATFORM_FLAGS[@]} .. make ${bc} popd cp $DR/${bc} $PREFIX/bin/megadepth megadepth-1.2.0/conda/megadepth/cross-linux.cmake000066400000000000000000000012171420302544700217640ustar00rootroot00000000000000# this one is important set(CMAKE_SYSTEM_NAME Linux) set(CMAKE_PLATFORM Linux) #this one not so much set(CMAKE_SYSTEM_VERSION 1) # specify the cross compiler set(CMAKE_C_COMPILER $ENV{CC}) # where is the target environment set(CMAKE_FIND_ROOT_PATH $ENV{PREFIX} $ENV{BUILD_PREFIX}/$ENV{HOST}/sysroot) # search for programs in the build host directories set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) # for libraries and headers in the target directories set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) # god-awful hack because it seems to not run correct tests to determine this: set(__CHAR_UNSIGNED___EXITCODE 1) megadepth-1.2.0/conda/megadepth/meta.yaml000066400000000000000000000020741420302544700203100ustar00rootroot00000000000000{% set name = "Megadepth" %} {% set version = "1.0.9b" %} {% set sha256 = "505422fc3bd99a3bb1119a5c615c12120eab56a485a5163d816ed2bf2ba27993" %} # based on WiggleTools recipe package: name: {{ name|lower }} version: {{ version }} build: number: 0 source: url: https://github.com/ChristopherWilks/megadepth/archive/{{ version }}.tar.gz sha256: {{ sha256 }} requirements: build: - {{ compiler('c') }} - {{ compiler('cxx') }} host: - cmake - libbigwig - htslib - zlib - libcurl - libdeflate run: - libbigwig - htslib test: commands: - megadepth --help about: home: https://github.com/ChristopherWilks/megadepth license: MIT license_family: MIT license_file: LICENSE.txt summary: Megadepth is an efficient tool for extracting coverage related information from RNA and DNA-seq BAM and BigWig files. It supports reading whole-genome coverage from BAM files and writing either indexed TSV or BigWig files, as well as efficient region coverage summary over intervals from both types of files. megadepth-1.2.0/countdecimaldigit.h000066400000000000000000000073011420302544700173130ustar00rootroot00000000000000/** Copyright (C) 2014 Milo Yip Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. **/ #pragma once #include #ifdef _MSC_VER #include "intrin.h" #endif inline unsigned CountDecimalDigit32(uint32_t n) { #if defined(_MSC_VER) || defined(__GNUC__) static const uint32_t powers_of_10[] = { 0, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000 }; #ifdef _MSC_VER unsigned long i = 0; _BitScanReverse(&i, n | 1); uint32_t t = (i + 1) * 1233 >> 12; #elif __GNUC__ uint32_t t = (32 - __builtin_clz(n | 1)) * 1233 >> 12; #endif return t - (n < powers_of_10[t]) + 1; #else // Simple pure C++ implementation if (n < 10) return 1; if (n < 100) return 2; if (n < 1000) return 3; if (n < 10000) return 4; if (n < 100000) return 5; if (n < 1000000) return 6; if (n < 10000000) return 7; if (n < 100000000) return 8; if (n < 1000000000) return 9; return 10; #endif } inline unsigned CountDecimalDigit64(uint64_t n) { #if defined(_MSC_VER) || defined(__GNUC__) static const uint64_t powers_of_10[] = { 0, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, 10000000000, 100000000000, 1000000000000, 10000000000000, 100000000000000, 1000000000000000, 10000000000000000, 100000000000000000, 1000000000000000000, 10000000000000000000U }; #if __GNUC__ uint32_t t = (64 - __builtin_clzll(n | 1)) * 1233 >> 12; #elif _M_IX86 unsigned long i = 0; uint64_t m = n | 1; if (_BitScanReverse(&i, m >> 32)) i += 32; else _BitScanReverse(&i, m & 0xFFFFFFFF); uint32_t t = (i + 1) * 1233 >> 12; #elif _M_X64 unsigned long i = 0; _BitScanReverse64(&i, n | 1); uint32_t t = (i + 1) * 1233 >> 12; #endif return t - (n < powers_of_10[t]) + 1; #else // Simple pure C++ implementation if (n < 10) return 1; if (n < 100) return 2; if (n < 1000) return 3; if (n < 10000) return 4; if (n < 100000) return 5; if (n < 1000000) return 6; if (n < 10000000) return 7; if (n < 100000000) return 8; if (n < 1000000000) return 9; if (n < 10000000000) return 10; if (n < 100000000000) return 11; if (n < 1000000000000) return 12; if (n < 10000000000000) return 13; if (n < 100000000000000) return 14; if (n < 1000000000000000) return 15; if (n < 10000000000000000) return 16; if (n < 100000000000000000) return 17; if (n < 1000000000000000000) return 18; if (n < 10000000000000000000) return 19; return 20; #endif } megadepth-1.2.0/countlut.hpp000066400000000000000000000070141420302544700160410ustar00rootroot00000000000000/** Copyright (C) 2014 Milo Yip Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. **/ #include "countdecimaldigit.h" #include "digitslut.h" // Additional count number of digit pass // Use lookup table of two gDigitsLut uint32_t u32toa_countlut(uint32_t value, char* buffer, char delim) { unsigned digit = CountDecimalDigit32(value); buffer += digit; *buffer = delim; //char* buffer_end = buffer; while (value >= 100) { const unsigned i = (value % 100) << 1; value /= 100; *--buffer = gDigitsLut[i + 1]; *--buffer = gDigitsLut[i]; } if (value < 10) { *--buffer = char(value) + '0'; } else { const unsigned i = value << 1; *--buffer = gDigitsLut[i + 1]; *--buffer = gDigitsLut[i]; } //return buffer_end; return digit; } void i32toa_countlut(int32_t value, char* buffer) { uint32_t u = static_cast(value); if (value < 0) { *buffer++ = '-'; u = ~u + 1; } u32toa_countlut(u, buffer, '\0'); } void u64toa_countlut(uint64_t value, char* buffer) { unsigned digit = CountDecimalDigit64(value); buffer += digit; *buffer = '\0'; while (value >= 100000000) { const uint32_t a = static_cast(value % 100000000); value /= 100000000; const uint32_t b = a / 10000; const uint32_t c = a % 10000; const uint32_t b1 = (b / 100) << 1; const uint32_t b2 = (b % 100) << 1; const uint32_t c1 = (c / 100) << 1; const uint32_t c2 = (c % 100) << 1; buffer -= 8; buffer[0] = gDigitsLut[b1]; buffer[1] = gDigitsLut[b1 + 1]; buffer[2] = gDigitsLut[b2]; buffer[3] = gDigitsLut[b2 + 1]; buffer[4] = gDigitsLut[c1]; buffer[5] = gDigitsLut[c1 + 1]; buffer[6] = gDigitsLut[c2]; buffer[7] = gDigitsLut[c2 + 1]; } uint32_t value32 = static_cast(value); while (value32 >= 100) { const unsigned i = static_cast(value32 % 100) << 1; value32 /= 100; *--buffer = gDigitsLut[i + 1]; *--buffer = gDigitsLut[i]; } if (value32 < 10) { *--buffer = char(value32) + '0'; } else { const unsigned i = static_cast(value32) << 1; *--buffer = gDigitsLut[i + 1]; *--buffer = gDigitsLut[i]; } } void i64toa_countlut(int64_t value, char* buffer) { uint64_t u = static_cast(value); if (value < 0) { *buffer++ = '-'; u = ~u + 1; } u64toa_countlut(u, buffer); } megadepth-1.2.0/create_docker_to_run_megadepth.sh000077500000000000000000000005641420302544700222130ustar00rootroot00000000000000mkdir -p docker.run.build rsync -av megadepth_statlib docker.run.build/ rsync -av Dockerfile.run docker.run.build/Dockerfile pushd docker.run.build/ VER=$(cat ../VERSION) docker build --tag quay.io/broadsword/megadepth:${VER} --tag quay.io/broadsword/megadepth:latest . popd docker push quay.io/broadsword/megadepth:${VER} docker push quay.io/broadsword/megadepth:latest megadepth-1.2.0/digitslut.h000066400000000000000000000036721420302544700156420ustar00rootroot00000000000000/** Copyright (C) 2014 Milo Yip Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. **/ #pragma once const char gDigitsLut[200] = { '0','0','0','1','0','2','0','3','0','4','0','5','0','6','0','7','0','8','0','9', '1','0','1','1','1','2','1','3','1','4','1','5','1','6','1','7','1','8','1','9', '2','0','2','1','2','2','2','3','2','4','2','5','2','6','2','7','2','8','2','9', '3','0','3','1','3','2','3','3','3','4','3','5','3','6','3','7','3','8','3','9', '4','0','4','1','4','2','4','3','4','4','4','5','4','6','4','7','4','8','4','9', '5','0','5','1','5','2','5','3','5','4','5','5','5','6','5','7','5','8','5','9', '6','0','6','1','6','2','6','3','6','4','6','5','6','6','6','7','6','8','6','9', '7','0','7','1','7','2','7','3','7','4','7','5','7','6','7','7','7','8','7','9', '8','0','8','1','8','2','8','3','8','4','8','5','8','6','8','7','8','8','8','9', '9','0','9','1','9','2','9','3','9','4','9','5','9','6','9','7','9','8','9','9' }; megadepth-1.2.0/docker_build.sh000077500000000000000000000000611420302544700164330ustar00rootroot00000000000000#!/usr/bin/env bash docker build -t megadepth . megadepth-1.2.0/entry.sh000077500000000000000000000011751420302544700151550ustar00rootroot00000000000000#!/usr/bin/env bash set -e if [[ ! -d /code ]] ; then echo "Must mount megadepth directory at /code inside container" fi cd /code rm -rf htslib libBigWig ./get_htslib.sh ./get_libBigWig.sh set -x DR=build-release-temp mkdir -p ${DR} pushd ${DR} cmake -DCMAKE_BUILD_TYPE=Release .. make ldd -v megadepth ./megadepth --help popd cp ${DR}/megadepth ./megadepth rm -rf ${DR} DR=build-debug-temp mkdir -p ${DR} pushd ${DR} cmake -DCMAKE_BUILD_TYPE=Debug .. make ldd -v megadepth ./megadepth --help popd cp ${DR}/megadepth ./megadepth-debug rm -rf ${DR} zip megadepth.zip megadepth megadepth-debug #rm -f megadepth megadepth-debug megadepth-1.2.0/get_htslib.sh000077500000000000000000000035661420302544700161460ustar00rootroot00000000000000#!/usr/bin/env bash set -ex #macos: # x86_64-apple-darwin12 (CC=o64-gcc) #OR # i386-apple-darwin1 (CC=o32-gcc) #windows: # x86_64-w64-mingw32 (64bit) #OR # i686-w64-mingw32 (32bit) compiler=$1 #"macos" or "windows" (or nothing for normal linux build) platform=$2 target_dir=htslib if [[ -n $platform && "$platform" != "hbb" ]]; then target_dir="htslib_"${platform} fi #VER=1.9 #VER=1.10.2 VER=1.11 #in case we're not using git submodules to pull dependencies if [[ -z $SUBMODULE ]]; then ar=htslib-${VER}.tar.bz2 if [[ ! -s $target_dir ]] ; then curl -OL https://github.com/samtools/htslib/releases/download/${VER}/${ar} bzip2 -dc ${ar} | tar xvf - rm -f ${ar} mv htslib-${VER} $target_dir fi fi pushd $target_dir autoheader autoconf make clean if [[ "$compiler" == "linux" ]]; then if [[ "$platform" == "hbb" ]]; then ./configure --enable-libcurl --disable-bz2 --disable-lzma --with-libdeflate make libhts.a else ./configure --disable-libcurl --disable-bz2 --disable-lzma --with-libdeflate make fi else if [[ "$platform" == "macos" ]]; then ./configure --enable-plugins --enable-libcurl --disable-bz2 --disable-lzma --with-libdeflate --host=$compiler #inherit CC, AR, and RANLIB from build_no_container_xcross.sh echo $CC echo $AR echo $RANLIB #only make static lib for cross-compilation for now #export CC=/opt/osxcross/target/bin/${compiler}-gcc #export AR=/opt/osxcross/target/bin/${compiler}-ar #export RANLIB=/opt/osxcross/target/bin/${compiler}-ranlib else # windows ./configure --disable-bz2 --disable-lzma --with-libdeflate --host=$compiler export CC=${compiler}-gcc export AR=${compiler}-ar export RANLIB=${compiler}-ranlib fi make CC=$CC AR=$AR RANLIB=$RANLIB libhts.a fi popd megadepth-1.2.0/get_libBigWig.sh000077500000000000000000000014611420302544700165100ustar00rootroot00000000000000#!/usr/bin/env bash set -ex platform=$1 target_dir=libBigWig #in case we're not using git submodules to pull dependencies if [[ -z $SUBMODULE ]]; then if [[ -n $platform ]]; then target_dir="libBigWig_"${platform} fi VER=0.4.4 TARGZ=${VER}.tar.gz FN=libBigWig-${TARGZ} DIR=libBigWig-${VER} curl -L https://github.com/dpryan79/libBigWig/archive/${TARGZ} > $FN tar -zxvf $FN rm -f ${FN} mv $DIR $target_dir fi pushd $target_dir cp Makefile Makefile.orig cat Makefile.orig | perl -ne 'chomp; $f=$_; $f=~s/^(CFLAGS.+)$/$1 -DNOCURL/; $f=~s/^(LIBS =.*)(-lcurl)/$1/; $f=~s/^(LDFLAGS.+)=/$1\?=/; print "$f\n";' > Makefile.nocurl cat Makefile.orig | perl -ne 'chomp; $f=$_; $f=~s/^(CFLAGS \?= )$/$1 -fPIC /; $f=~s/^(LDFLAGS.+)=/$1\?=/; print "$f\n";' > Makefile.fpic popd megadepth-1.2.0/get_libdeflate.sh000077500000000000000000000014751420302544700167510ustar00rootroot00000000000000#!/usr/bin/env bash set -ex compiler=$1 platform=$2 target_dir=libdeflate if [[ -z $SUBMODULE ]]; then if [[ -n $platform ]]; then target_dir="libdeflate_"${platform} fi VER=1.6 TARGZ=${VER}.tar.gz FN=libdeflate-${TARGZ} DIR=libdeflate-${VER} curl -L https://github.com/ebiggers/libdeflate/archive/v${TARGZ} > $FN tar -zxvf $FN rm -f ${FN} mv $DIR $target_dir fi pushd $target_dir target="libdeflate.a" if [[ -z $compiler ]]; then #from https://github.com/samtools/htslib/issues/688 make CFLAGS="$CFLAGS -fPIC -O3" $target else if [[ "$platform" == "windows" ]]; then make CC=${compiler}-gcc CFLAGS='-O3' libdeflatestatic.lib ln -fs libdeflatestatic.lib libdeflate.a else make CC=${compiler}-gcc CFLAGS='-fPIC -O3' $target fi fi popd megadepth-1.2.0/get_zlib.sh000077500000000000000000000004601420302544700156070ustar00rootroot00000000000000#!/usr/bin/env bash set -ex VER=1.2.11 if [[ -z $SUBMODULE ]]; then ar=zlib-${VER}.tar.gz wget https://www.zlib.net/${ar} gzip -dc ${ar} | tar xvf - rm -f ${ar} pushd zlib-${VER} else pushd zlib fi ./configure make popd if [[ -z $SUBMODULE ]]; then mv zlib-${VER} zlib fi megadepth-1.2.0/getline.c000066400000000000000000000023101420302544700152400ustar00rootroot00000000000000/* The original code is public domain -- Will Hartung 4/9/09 */ /* Modifications, public domain as well, by Antti Haapala, 11/10/17 - Switched to getc on 5/23/19 */ #include "getline.h" ssize_t getline(char **lineptr, size_t *n, FILE *stream) { size_t pos; int c; if (lineptr == NULL || stream == NULL || n == NULL) { errno = EINVAL; return -1; } c = getc(stream); if (c == EOF) { return -1; } if (*lineptr == NULL) { *lineptr = malloc(128); if (*lineptr == NULL) { return -1; } *n = 128; } pos = 0; while(c != EOF) { if (pos + 1 >= *n) { size_t new_size = *n + (*n >> 2); if (new_size < 128) { new_size = 128; } char *new_ptr = realloc(*lineptr, new_size); if (new_ptr == NULL) { return -1; } *n = new_size; *lineptr = new_ptr; } ((unsigned char *)(*lineptr))[pos ++] = c; if (c == '\n') { break; } c = getc(stream); } (*lineptr)[pos] = '\0'; return pos; } /* int main(char** argv, int argn) { } */ megadepth-1.2.0/getline.h000066400000000000000000000004201420302544700152450ustar00rootroot00000000000000 #include #include #include #include #ifdef __cplusplus extern "C"{ #endif // if typedef doesn't exist (msvc, blah) typedef intptr_t ssize_t; ssize_t getline(char **lineptr, size_t *n, FILE *stream); #ifdef __cplusplus } #endif megadepth-1.2.0/htslib_ci/000077500000000000000000000000001420302544700154115ustar00rootroot00000000000000megadepth-1.2.0/junctions/000077500000000000000000000000001420302544700154655ustar00rootroot00000000000000megadepth-1.2.0/junctions/process_jx_output.sh000077500000000000000000000046501420302544700216300ustar00rootroot00000000000000#!/usr/bin/env bash set -x -o pipefail -o errexit -o nounset #this will produce a sorted, STAR-compatible formatted file of splice junction calls from BAM processed with Megadepth --all-junctions #unlike STAR: #1) splice junction strand is *not* determined (always 0 in column 4) #2) splice junction motifs are *not* determineda (always 0 in column 5) #both of these can be determined by extracting the dinucleotide motifs for the given reference coordinates for canonical motifs #input format: #read_name, chrom, start(1-based), end, strand_of_alignment (or actual strand if XS:A tag present), CIGAR, is_uniqe_alignment(1/0) jx_file=$1 #for faster sorting export LC_ALL=C #sort & format for compatibility unifier using STAR's SJ.out file format: #chromosome 1based_intron_start 1based_intron_end strand_0:undefined,1:+,2:- intron_motif:0:non-canonical;1:GT/AG,2:CT/AC,3:GC/AG,4:CT/GC,5:AT/AC,6:GT/AT 0:unannotated,1:annotated_in_spliceDB #uniquely_mapping_reads #multi_mapping_reads maximum_spliced_alignment_overhang #this determines maximum_spliced_alignment_overhang via STAR's method as described here (last post by Dobin): #https://groups.google.com/g/rna-star/c/XN0cWBxVFcM/m/ywcUg_s3CQAJ #for now, we leave blank the last column from STAR, which is the maximum of the min anchors for each junction, this requires more info than megadepth emits at this time (2021/01) sort -k2,2 -k3,3n -k4,4n -k1,1 -u $jx_file | cut -f 1,2-4,6,7 | perl -ne 'chomp; ($qname,$c,$s,$e,$cigar,$is_unique)=split(/\t/,$_); if($pc) { if($s == $ps && $e == $pe) { if($is_unique == 1) { $ucnt++; } else { $cnt++; } next; } else { print "$pc\t$ps\t$pe\t0\t0\t0\t$ucnt\t$cnt\t\n"; }} $ucnt=0; $cnt=0; if($is_unique == 1) { $ucnt=1; } else { $cnt=1; } $pc=$c; $ps=$s; $pe=$e; END { if($pc) { print "$pc\t$ps\t$pe\t0\t0\t0\t$ucnt\t$cnt\t\n"; }}' > ${jx_file}.sjout #new way, using XS:A strand determination if present, not yet ready #sort -k2,2 -k3,3n -k4,4n -k1,1 -u $jx_file | perl -ne 'chomp; ($qname,$c,$s,$e,$orient,$cigar,$is_unique)=split(/\t/,$_); if($pc) { if($s == $ps && $e == $pe && $o eq $po) { if($is_unique == 1) { $ucnt++; } else { $cnt++; } next; } else { print "$pc\t$ps\t$pe\t$po\t0\t0\t$ucnt\t$cnt\t\n"; }} $ucnt=0; $cnt=0; if($is_unique == 1) { $ucnt=1; } else { $cnt=1; } $pc=$c; $ps=$s; $pe=$e; $po=0; if($orient ne "0" && $orient ne "1") { $po=$orient eq "+"?1:2; } END { if($pc) { print "$pc\t$ps\t$pe\t$po\t0\t0\t$ucnt\t$cnt\t\n"; }}' > ${jx_file}.sjout megadepth-1.2.0/libBigWig_ci/000077500000000000000000000000001420302544700157635ustar00rootroot00000000000000megadepth-1.2.0/libdeflate_ci/000077500000000000000000000000001420302544700162175ustar00rootroot00000000000000megadepth-1.2.0/megadepth000077500000000000000000000002261420302544700153350ustar00rootroot00000000000000#!/usr/bin/env bash #get this script's path p=$(dirname $0) export LD_LIBRARY_PATH=$p/libBigWig:$p/htslib:$LD_LIBRARY_PATH $p/megadepth_release "$@" megadepth-1.2.0/megadepth.cpp000066400000000000000000004550661420302544700161330ustar00rootroot00000000000000/* The MIT License Copyright (c) 2018- by Christopher Wilks and Ben Langmead Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* .___ ___. _______ _______ ___ _______ _______ .______ .___________. __ __ | \/ | | ____| / _____| / \ | \ | ____|| _ \ | || | | | | \ / | | |__ | | __ / ^ \ | .--. || |__ | |_) | `---| |----`| |__| | | |\/| | | __| | | |_ | / /_\ \ | | | || __| | ___/ | | | __ | | | | | | |____ | |__| | / _____ \ | '--' || |____ | | | | | | | | |__| |__| |_______| \______| /__/ \__\ |_______/ |_______|| _| |__| |__| |__| */ #define __STDC_FORMAT_MACROS #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "bigWig.h" #include "countlut.hpp" #ifdef WINDOWS_MINGW #include #include #include "getline.h" #include "mingw-std-threads/mingw.thread.h" template using hashmap = std::unordered_map; template using hashset = std::unordered_set; #else #include "robin_hood.h" template using hashmap = robin_hood::unordered_map; template using hashset = robin_hood::unordered_set; #endif #if defined(__AVX2__) || defined(__SSE2__) #include #endif #if defined(__GNUC__) || defined(__clang__) # ifndef unlikely # define unlikely(x) __builtin_expect(!!(x), 0) # endif # ifndef likely # define likely(x) __builtin_expect(!!(x), 1) # endif #endif int UNKNOWN_FORMAT=-1; int BAM_FORMAT = 1; int BW_FORMAT = 2; int CRAM_FORMAT = 3; //taken from HTSlib bgzip int BGZF_WRITE_WINDOW_SZ = 64 * 1024; //critical to use a high value here for remote BigWigs //accesses, has much less (maybe no) effect on local processing const uint32_t default_BW_READ_BUFFER = 1<<30; uint32_t BW_READ_BUFFER = default_BW_READ_BUFFER; bool SUMS_ONLY = false; bool SORTED_ANNOTATIONS = true; int COLLAPSED_ANNOTATION_MAX_DISTANCE = 2200; typedef std::vector strvec; typedef hashmap mate2len; typedef hashmap str2dblist; uint64_t MAX_INT = (2^63); //how many intervals to start with for a chromosome in a BigWig file //uint64_t STARTING_NUM_INTERVALS = 1000; uint64_t STARTING_NUM_INTERVALS = 1000000; //used for --annotation where we read a 3+ column BED file static const int CHRM_COL=0; static const int START_COL=1; static const int END_COL=2; //1MB per line should be more than enough for CIO static const int LINE_BUFFER_LENGTH=1048576; static const int BIGWIG_INIT_VAL = 17; static double SOFTCLIP_POLYA_TOTAL_COUNT_MIN=3; static double SOFTCLIP_POLYA_RATIO_MIN=0.8; //used for buffering up text/gz output static const int OUT_BUFF_SZ=4000000; static const int COORD_STR_LEN=34; enum Op { csum, cmean, cmin, cmax }; static const void print_version() { std::cout << "megadepth " << std::string(MEGADEPTH_VERSION) << std::endl; } struct Coordinate { char* chrm; int32_t start; int32_t end; }; static char emptystr[] = "\0"; static const char USAGE[] = "BAM and BigWig utility.\n" "\n" "Usage:\n" " megadepth [options]\n" "\n" "Options:\n" " -h --help Show this screen.\n" " --version Show version.\n" " --threads # of threads to do: BAM decompression OR compute sums over multiple BigWigs in parallel\n" " if the 2nd is intended then a TXT file listing the paths to the BigWigs to process in parallel\n" " should be passed in as the main input file instead of a single BigWig file (EXPERIMENTAL).\n" " --prefix String to use to prefix all output files.\n" " --no-auc-stdout Force all AUC(s) to be written to .auc.tsv rather than STDOUT\n" " --no-annotation-stdout Force summarized annotation regions to be written to .annotation.tsv rather than STDOUT\n" " --no-coverage-stdout Force covered regions to be written to .coverage.tsv rather than STDOUT\n" " --keep-order Output annotation coverage in the order chromosomes appear in the BAM/BigWig file\n" " The default is to output annotation coverage in the order chromosomes appear in the annotation BED file.\n" " This is only applicable if --annotation is used for either BAM or BigWig input.\n" "\n" "BigWig Input:\n" "Extract regions and their counts from a BigWig outputting BED format if a BigWig file is detected as input (exclusive of the other BAM modes):\n" " Extracts all reads from the passed in BigWig and output as BED format.\n" " This will also report the AUC over the annotated regions to STDOUT.\n" " If only the name of the BigWig file is passed in with no other args, it will *only* report total AUC to STDOUT.\n" " --annotation Only output the regions in this BED applying the argument to --op to them.\n" " --op Statistic to run on the intervals provided by --annotation\n" " --sums-only Discard coordinates from output of summarized regions\n" " --distance (2200[default]) Number of base pairs between end of last annotation and start of new to consider in the same BigWig query window (a form of binning) for performance. This determines the number of times the BigWig index is queried.\n" " --unsorted (off[default]) There's a performance improvement *if* BED file passed to --annotation is 1) sorted by sort -k1,1 -k2,2n (default is to assume sorted and check for unsorted positions, if unsorted positions are found, will fall back to slower version)\n" " --bwbuffer <1GB[default]> Size of buffer for reading BigWig files, critical to use a large value (~1GB) for remote BigWigs.\n" " Default setting should be fine for most uses, but raise if very slow on a remote BigWig.\n" "\n" "\n" "BAM Input:\n" "Extract basic junction information from the BAM, including co-occurrence\n" "If only the name of the BAM file is passed in with no other args, it will *only* report total AUC to STDOUT.\n" " --fasta Path to the reference FASTA file if a CRAM file is passed as the input file (ignored otherwise)\n" " If not passed, references will be downloaded using the CRAM header.\n" " --junctions Extract co-occurring jx coordinates, strand, and anchor length, per read\n" " writes to a TSV file .jxs.tsv\n" " --all-junctions Extract all jx coordinates, strand, and anchor length, per read for any jx\n" " writes to a TSV file .all_jxs.tsv\n" " --longreads Modifies certain buffer sizes to accommodate longer reads such as PB/Oxford.\n" " --filter-in Integer bitmask, any bits of which alignments need to have to be kept (similar to samtools view -f).\n" " --filter-out Integer bitmask, any bits of which alignments need to have to be skipped (similar to samtools view -F).\n" " --add-chr-prefix Adds \"chr\" prefix to relevant chromosomes for BAMs w/o it, pass \"human\" or \"mouse\".\n" " Only works for human/mouse references (default: off).\n" "\n" "Non-reference summaries:\n" " --alts Print differing from ref per-base coverages\n" " Writes to a CSV file .alts.tsv\n" " --include-softclip Print a record to the alts CSV for soft-clipped bases\n" " Writes total counts to a separate TSV file .softclip.tsv\n" " --only-polya If --include-softclip, only print softclips which are mostly A's or T's\n" " --include-n Print mismatch records when mismatched read base is N\n" " --print-qual Print quality values for mismatched bases\n" " --delta Print POS field as +/- delta from previous\n" " --require-mdz Quit with error unless MD:Z field exists everywhere it's\n" " expected\n" " --head Print sequence names and lengths in SAM/BAM header\n" "\n" "Coverage and quantification:\n" " --coverage Print per-base coverage (slow but totally worth it)\n" " --auc Print per-base area-under-coverage, will generate it for the genome\n" " and for the annotation if --annotation is also passed in\n" " Defaults to STDOUT, unless other params are passed in as well, then\n" " if writes to a TSV file .auc.tsv\n" " --bigwig Output coverage as BigWig file(s). Writes to .bw\n" " (also .unique.bw when --min-unique-qual is specified).\n" " Requires libBigWig.\n" " --annotation Path to BED file containing list of regions to sum coverage over\n" " (tab-delimited: chrm,start,end). Or this can specify a contiguous region size in bp.\n" " --op Statistic to run on the intervals provided by --annotation\n" " --no-index If using --annotation, skip the use of the BAM index (BAI) for pulling out regions.\n" " Setting this can be faster if doing windows across the whole genome.\n" " This will be turned on automatically if a window size is passed to --annotation.\n" " --min-unique-qual \n" " Output second bigWig consisting built only from alignments\n" " with at least this mapping quality. --bigwig must be specified.\n" " Also produces second set of annotation sums based on this coverage\n" " if --annotation is enabled\n" " --double-count Allow overlapping ends of PE read to count twice toward\n" " coverage\n" " --num-bases Report total sum of bases in alignments processed (that pass filters)\n" " --gzip Turns on gzipping of coverage output (no effect if --bigwig is passsed),\n" " this will also enable --no-coverage-stdout.\n" "\n" "Other outputs:\n" " --read-ends Print counts of read starts/ends, if --min-unique-qual is set\n" " then only the alignments that pass that filter will be counted here\n" " Writes to 2 TSV files: .starts.tsv, .ends.tsv\n" " --frag-dist Print fragment length distribution across the genome\n" " Writes to a TSV file .frags.tsv\n" " --echo-sam Print a SAM record for each aligned read\n" " --ends Report end coordinate for each read (useful for debugging)\n" " --test-polya Lower Poly-A filter minimums for testing (only useful for debugging/testing)\n" "\n"; int my_write(void* fh, char* buf, uint32_t buf_len) { #if USE_POSIX return ::write(::fileno(fh), buf, bu_len); #else return std::fwrite(buf, 1, buf_len, (FILE *)fh); #endif } int my_gzwrite(void* fh, char* buf, uint32_t buf_len) { return bgzf_write((BGZF*)fh, buf, buf_len); //return gzwrite(*((gzFile*) fh), buf, buf_len); } template int print_local(char* buf,const char* c, long start, long end, T val, double* local_vals, long z); template int print_local_sums_only(char* buf,const char* c, long start, long end, T val, double* local_vals, long z); template int print_shared(char* buf,const char* c, long start, long end, T val, double* local_vals, long z); template int print_shared_sums_only(char* buf,const char* c, long start, long end, T val, double* local_vals, long z); template <> int print_local(char* buf,const char* c, long start, long end, long val, double* local_vals, long z) { return sprintf(buf, "%s\t%lu\t%lu\t%lu\n", c, start, end, (long) local_vals[z]); } template <> int print_local_sums_only(char* buf,const char* c, long start, long end, long val, double* local_vals, long z) { return sprintf(buf, "%lu\n", (long) local_vals[z]); } template <> int print_shared(char* buf,const char* c, long start, long end, long val, double* local_vals, long z) { return sprintf(buf, "%s\t%lu\t%lu\t%lu\n", c, start, end, val); } template <> int print_shared_sums_only(char* buf,const char* c, long start, long end, long val, double* local_vals, long z) { return sprintf(buf, "%lu\n", val); } template <> int print_shared(char* buf, const char* c, long start, long end, double val, double* local_vals, long z) { //from https://stackoverflow.com/questions/994764/rounding-doubles-5-sprintf return sprintf(buf, "%s\t%lu\t%lu\t%.2f\n", c, (long) start, (long) end, (round(val*100.)/100.)); //return sprintf(buf, "%s\t%lu\t%lu\t%.2f\t%.11f\t%lu\n", c, (long) start, (long) end, (round(val*100.)/100.), val, (end-start)); } template <> int print_shared_sums_only(char* buf, const char* c, long start, long end, double val, double* local_vals, long z) { return sprintf(buf, "%.2f\n", (round(val*100.)/100.)); } template <> int print_local(char* buf, const char* c, long start, long end, double val, double* local_vals, long z) { return sprintf(buf, "%s\t%lu\t%lu\t%.2f\n", c, (long) start, (long) end, (round(local_vals[z]*100.)/100.)); } template <> int print_local_sums_only(char* buf, const char* c, long start, long end, double val, double* local_vals, long z) { return sprintf(buf, "%.2f\n", (round(local_vals[z]*100.)/100.)); } static const char* get_positional_n(const char ** begin, const char ** end, size_t n) { size_t i = 0; for(const char **itr = begin; itr != end; itr++) { if((*itr)[0] != '-' || strlen(*itr) == 1) { if(i++ == n) { return *itr; } } } return nullptr; } static bool has_option(const char** begin, const char** end, const std::string& option) { return std::find(begin, end, option) != end; } /** * Return the argument after the given one, (or further downstream when shift > 0). */ static const char** get_option( const char** begin, const char** end, const std::string& option, unsigned shift = 0) { const char** itr = std::find(begin, end, option); return itr + shift + 1; } /** * Holds an MDZ "operation" * op can be */ struct MdzOp { char op; int run; char str[1024]; }; //from https://github.com/samtools/htslib/blob/7c04ea5c328547e9e8a9af4b932b87a3cb1939e6/hts.c#L82 int A_idx = 1; int T_idx = 8; static inline int polya_check(const uint8_t *str, size_t off, size_t run, char *c) { char seq_nt16_str_counts[16] = {0}; for(size_t i = off; i < off + run; i++) seq_nt16_str_counts[bam_seqi(str, i)]++; int count = -1; if((seq_nt16_str_counts[A_idx] / (double) run) >= SOFTCLIP_POLYA_RATIO_MIN) { *c = 'A'; count = seq_nt16_str_counts[A_idx]; } else if((seq_nt16_str_counts[T_idx] / (double) run) >= SOFTCLIP_POLYA_RATIO_MIN) { *c = 'T'; count = seq_nt16_str_counts[T_idx]; } return count; } static const char seq_rev_nt16_str[] = "=TGMCRSVAWYHKDBN"; static inline std::ostream& seq_substring(std::ostream& os, const uint8_t *str, size_t off, size_t run, bool reverse=false) { if(reverse) { int i=(off+run)-1; while(((int) off) <= i) { int io = bam_seqi(str, i); os << seq_rev_nt16_str[io]; i--; } return os; } for(size_t i = off; i < off + run; i++) { os << seq_nt16_str[bam_seqi(str, i)]; } return os; } static inline char* seq_substring(const uint8_t *str, size_t off, size_t run, bool reverse=false) { char* seq = new char[off + run + 1]; int k = 0; if(reverse) { int i=(off+run)-1; while(((int) off) <= i) { int io = bam_seqi(str, i); seq[k++] = seq_rev_nt16_str[io]; i--; } seq[k]='\0'; return seq; } for(size_t i = off; i < off + run; i++) { seq[k++] = seq_nt16_str[bam_seqi(str, i)]; } seq[k]='\0'; return seq; } static inline std::ostream& kstring_out(std::ostream& os, const kstring_t *str) { for(size_t i = 0; i < str->l; i++) { os << str->s[i]; } return os; } static inline std::ostream& cstr_substring(std::ostream& os, const uint8_t *str, size_t off, size_t run) { for(size_t i = off; i < off + run; i++) { os << (char)str[i]; } return os; } static inline char* cstr_substring(const uint8_t *str, size_t off, size_t run) { char* quals = new char[off + run]; int k = 0; for(size_t i = off; i < off + run; i++) quals[k++] = (char)str[i]; quals[k]='\0'; return quals; } static inline std::ostream& qstr_substring(std::ostream& os, const uint8_t *str, size_t off, size_t run, bool reverse=false) { if(reverse) { int i=(off+run)-1; while(((int) off) <= i) { os << (char)(str[i]+33); i--; } return os; } for(size_t i = off; i < off + run; i++) { os << (char)(str[i]+33); } return os; } /** * Parse given MD:Z extra field into a vector of MD:Z operations. */ static void parse_mdz( const uint8_t *mdz, std::vector& ops) { int i = 0; size_t mdz_len = strlen((char *)mdz); while(i < mdz_len) { if(isdigit(mdz[i])) { int run = 0; while(i < mdz_len && isdigit(mdz[i])) { run *= 10; run += (int)(mdz[i] - '0'); i++; } if(run > 0) { ops.emplace_back(MdzOp{'=', run, ""}); ops.back().str[0] = '\0'; } } else if(isalpha(mdz[i])) { int st = i; while(i < mdz_len && isalpha(mdz[i])) i++; assert(i > st); ops.emplace_back(MdzOp{'X', i - st, ""}); for(int j = 0; j < i ; j++) { ops.back().str[j] = mdz[st + j]; } std::memcpy(ops.back().str, mdz + st, (size_t)(i - st)); ops.back().str[i - st] = '\0'; } else if(mdz[i] == '^') { i++; int st = i; while (i < mdz_len && isalpha(mdz[i])) i++; assert(i > st); ops.emplace_back(MdzOp{'^', i - st, ""}); std::memcpy(ops.back().str, mdz + st, (size_t)(i - st)); ops.back().str[i - st] = '\0'; } else { std::stringstream ss; ss << "Unknown MD:Z operation: \"" << mdz[i] << "\""; throw std::runtime_error(ss.str()); } } } static bool check_for_overlap(std::vector* overlapping_coords, int starting_idx, int32_t refpos) { for(auto it : *overlapping_coords) if(it.start <= refpos && it.end >= refpos) return true; return false; } struct CigarOp { char op; int32_t refidx; int32_t refpos; char* seq; char* quals; //std::ostream seq; //std::ostream quals; int32_t del_len; }; typedef hashmap> read2cigarops; //only applies to X,D, and I ops (not S [softclipping]) static void emit_alt_record(std::fstream& fout, CigarOp& cig, const char* qname) { fout << cig.refidx << ',' << cig.refpos << ',' << cig.op << ','; if(cig.op == 'D') fout << cig.del_len; else fout << cig.seq; //cleanup, assumes there's only 2 mates in a read delete cig.seq; fout << ',' << qname << ','; if(cig.quals) { fout << cig.quals; delete cig.quals; } fout << '\n'; } static void check_saved_ops(std::fstream& fout, std::vector* saved_ops, std::vector* overlapping_coords, char* real_qname, bool check_for_overlaps_flag = true) { int coord_idx = 0; for(auto it : *saved_ops) { char* qname = emptystr; if(check_for_overlaps_flag && check_for_overlap(overlapping_coords, coord_idx, it.refpos)) qname = real_qname; emit_alt_record(fout, it, qname); } } static bool output_from_cigar_mdz( const bam1_t *rec, std::vector& mdz, std::fstream& fout, uint64_t* total_softclip_count, char* real_qname, std::vector* overlapping_coords, std::vector* saved_ops = nullptr, bool save_ops = false, bool print_qual = false, bool include_sc = false, bool only_polya_sc = false, bool include_n_mms = false, bool delta = false) { //bool check_for_saved_ops = saved_ops->size() > 0; if(saved_ops->size() > 0) check_saved_ops(fout, saved_ops, overlapping_coords, real_qname); uint8_t *seq = bam_get_seq(rec); uint8_t *qual = bam_get_qual(rec); // If QUAL field is *. this array is just a bunch of 255s uint32_t *cigar = bam_get_cigar(rec); size_t mdzi = 0, seq_off = 0; int32_t ref_off = rec->core.pos; bool found = false; bool check_for_overlaps_flag = overlapping_coords->size() > 0; for(unsigned int k = 0; k < rec->core.n_cigar; k++) { int op = bam_cigar_op(cigar[k]); int run = bam_cigar_oplen(cigar[k]); if((strchr("DNMX=", BAM_CIGAR_STR[op]) != nullptr) && mdzi >= mdz.size()) { std::stringstream ss; ss << "Found read-consuming CIGAR op after MD:Z had been exhausted" << std::endl; throw std::runtime_error(ss.str()); } int coord_idx = 0; //TODO: track each I,D,X for a read if 1) first in a pair 2) possible overlap, otherwise just print if(op == BAM_CMATCH || op == BAM_CDIFF || op == BAM_CEQUAL) { // Look for block matches and mismatches in MD:Z string int runleft = run; while(runleft > 0 && mdzi < mdz.size()) { int run_comb = std::min(runleft, mdz[mdzi].run); runleft -= run_comb; assert(mdz[mdzi].op == 'X' || mdz[mdzi].op == '='); if(mdz[mdzi].op == '=') { // nop } else { assert(mdz[mdzi].op == 'X'); assert(strlen(mdz[mdzi].str) == run_comb); int cread = bam_seqi(seq, seq_off); if(!include_n_mms && run_comb == 1 && seq_nt16_str[cread] == 'N') { // skip } else { char* qname = emptystr; if(save_ops) { CigarOp cig; cig.refidx = rec->core.tid; cig.refpos = ref_off; cig.op = 'X'; cig.seq = seq_substring(seq, seq_off, (size_t)run_comb); cig.quals = nullptr; if(print_qual) cig.quals = cstr_substring(qual, seq_off, (size_t)run_comb); cig.del_len = 0; saved_ops->push_back(cig); } else { if(check_for_overlaps_flag && check_for_overlap(overlapping_coords, coord_idx, ref_off)) qname = real_qname; fout << rec->core.tid << ',' << ref_off << ",X,"; seq_substring(fout, seq, seq_off, (size_t)run_comb) << ',' << qname << ','; if(print_qual) cstr_substring(fout, qual, seq_off, (size_t)run_comb); fout << '\n'; found = true; } } } seq_off += run_comb; ref_off += run_comb; if(run_comb < mdz[mdzi].run) { assert(mdz[mdzi].op == '='); mdz[mdzi].run -= run_comb; } else { mdzi++; } } } else if(op == BAM_CINS) { char* qname = emptystr; if(save_ops) { CigarOp cig; cig.refidx = rec->core.tid; cig.refpos = ref_off; cig.op = 'I'; cig.seq = seq_substring(seq, seq_off, (size_t)run); cig.quals = nullptr; cig.del_len = 0; saved_ops->push_back(cig); } else { if(check_for_overlaps_flag && check_for_overlap(overlapping_coords, coord_idx, ref_off)) qname = real_qname; fout << rec->core.tid << ',' << ref_off << ",I,"; seq_substring(fout, seq, seq_off, (size_t)run) << ',' << qname << ",\n"; found = true; } seq_off += run; } else if(op == BAM_CSOFT_CLIP) { if(include_sc) { char direction = '+'; if(seq_off == 0) direction = '-'; (*total_softclip_count)+=run; if(only_polya_sc) { char c; int count_polya = polya_check(seq, seq_off, (size_t)run, &c); if(count_polya != -1 && run >= SOFTCLIP_POLYA_TOTAL_COUNT_MIN) { char* qname = emptystr; /*if(check_for_overlaps_flag && check_for_overlap(overlapping_coords, coord_idx, ref_off)) qname = real_qname;*/ fout << rec->core.tid << ',' << ref_off << ",S,"; fout << run << ',' << qname << ',' << direction << ',' << c << ',' << count_polya << '\n'; found = true; } } else { char* qname = emptystr; /*if(check_for_overlaps_flag && check_for_overlap(overlapping_coords, coord_idx, ref_off)) qname = real_qname;*/ fout << rec->core.tid << ',' << ref_off << ",S,"; seq_substring(fout, seq, seq_off, (size_t)run) << ',' << qname << ",\n"; found = true; } } seq_off += run; } else if (op == BAM_CDEL) { assert(mdz[mdzi].op == '^'); assert(run == mdz[mdzi].run); assert(strlen(mdz[mdzi].str) == run); mdzi++; char* qname = emptystr; if(save_ops) { CigarOp cig; cig.refidx = rec->core.tid; cig.refpos = ref_off; cig.op = 'D'; cig.seq = nullptr; cig.quals = nullptr; cig.del_len = run; saved_ops->push_back(cig); } else { if(check_for_overlaps_flag && check_for_overlap(overlapping_coords, coord_idx, ref_off)) qname = real_qname; fout << rec->core.tid << ',' << ref_off << ",D," << run << ',' << qname << ",\n"; found = true; } ref_off += run; } else if (op == BAM_CREF_SKIP) { ref_off += run; } else if (op == BAM_CHARD_CLIP) { } else if (op == BAM_CPAD) { } else { std::stringstream ss; ss << "No such CIGAR operation as \"" << op << "\""; throw std::runtime_error(ss.str()); } } assert(mdzi == mdz.size()); return found; } static bool output_from_cigar(const bam1_t *rec, std::fstream& fout, uint64_t* total_softclip_count, const bool include_sc, const bool only_polya_sc, char* real_qname, std::vector* overlapping_coords, std::vector* saved_ops = nullptr, bool save_ops = false) { if(saved_ops->size() > 0) check_saved_ops(fout, saved_ops, overlapping_coords, real_qname); uint8_t *seq = bam_get_seq(rec); uint32_t *cigar = bam_get_cigar(rec); uint32_t n_cigar = rec->core.n_cigar; bool found = false; if(n_cigar == 1) return found; int32_t refpos = rec->core.pos; int32_t seqpos = 0; int coord_idx = 0; bool check_for_overlaps_flag = overlapping_coords->size() > 0; for(uint32_t k = 0; k < n_cigar; k++) { int op = bam_cigar_op(cigar[k]); int run = bam_cigar_oplen(cigar[k]); switch(op) { case BAM_CDEL: { char* qname = emptystr; if(save_ops) { CigarOp cig; cig.refidx = rec->core.tid; cig.refpos = refpos; cig.op = 'D'; cig.seq = nullptr; cig.quals = nullptr; cig.del_len = run; saved_ops->push_back(cig); } else { if(check_for_overlaps_flag && check_for_overlap(overlapping_coords, coord_idx, refpos)) qname = real_qname; fout << rec->core.tid << ',' << refpos << ",D," << run << "," << qname << ",\n"; } refpos += run; break; } case BAM_CSOFT_CLIP: { if(include_sc) { char direction = '+'; if(seqpos == 0) direction = '-'; (*total_softclip_count) += run; if(only_polya_sc) { char c; int count_polya = polya_check(seq, (size_t)seqpos, (size_t)run, &c); if(count_polya != -1 && run >= SOFTCLIP_POLYA_TOTAL_COUNT_MIN) { char* qname = emptystr; fout << rec->core.tid << ',' << refpos << ',' << BAM_CIGAR_STR[op] << ','; fout << run << ',' << qname << ',' << direction << ',' << c << ',' << count_polya << '\n'; found = true; } } else { char* qname = emptystr; fout << rec->core.tid << ',' << refpos << ',' << BAM_CIGAR_STR[op] << ','; seq_substring(fout, seq, (size_t)seqpos, (size_t)run) << ',' << qname << ",\n"; found = true; } } seqpos += run; break; } case BAM_CINS: { char* qname = emptystr; if(save_ops) { CigarOp cig; cig.refidx = rec->core.tid; cig.refpos = refpos; cig.op = 'I'; cig.seq = seq_substring(seq, (size_t)seqpos, (size_t)run); cig.quals = nullptr; cig.del_len = 0; saved_ops->push_back(cig); } else { if(check_for_overlaps_flag && check_for_overlap(overlapping_coords, coord_idx, refpos)) qname = real_qname; fout << rec->core.tid << ',' << refpos << ',' << BAM_CIGAR_STR[op] << ','; seq_substring(fout, seq, (size_t)seqpos, (size_t)run) << ',' << qname << ",\n"; found = true; } seqpos += run; break; } case BAM_CREF_SKIP: { refpos += run; break; } case BAM_CMATCH: case BAM_CDIFF: case BAM_CEQUAL: { seqpos += run; refpos += run; break; } case BAM_CHARD_CLIP: case BAM_CPAD: { break; } default: { std::stringstream ss; //ss << "No such CIGAR operation as \"" << op << "\"" << n_cigar << " " << run << " " << k; ss << "No such CIGAR operation as \"" << op << "\""; throw std::runtime_error(ss.str()); } } } return found; } static void print_header(const bam_hdr_t * hdr) { for(int32_t i = 0; i < hdr->n_targets; i++) { std::cout << '@' << i << ',' << hdr->target_name[i] << ',' << hdr->target_len[i] << std::endl; } } static const long get_longest_target_size(const bam_hdr_t * hdr) { long max = 0; for(int32_t i = 0; i < hdr->n_targets; i++) { if(hdr->target_len[i] > max) max = hdr->target_len[i]; } return max; } static void reset_array(uint32_t* arr, const long arr_sz) { #if USE_SIMD_ZERO #if __AVX2__ __m256i zero = _mm256_setzero_si256(); static constexpr size_t nper = sizeof(__m256i) / sizeof(uint32_t); const size_t nsimd = arr_sz / nper; const size_t nsimd4 = (nsimd / 4) * 4; size_t i = 0; for(; i < nsimd4; i += 4) { _mm256_storeu_si256((__m256i *)(arr + nper * i), zero); _mm256_storeu_si256((__m256i *)(arr + nper * (i + 1)), zero); _mm256_storeu_si256((__m256i *)(arr + nper * (i + 2)), zero); _mm256_storeu_si256((__m256i *)(arr + nper * (i + 3)), zero); } for(;i < nsimd; ++i) { _mm256_storeu_si256((__m256i *)(arr + nper * i), zero); } for(i *= sizeof(__m256i) / sizeof(uint32_t); i < arr_sz; ++i) { arr[i] = 0; } #elif __SSE2__ __m128i zero = _mm_setzero_si128(); const size_t nsimd = arr_sz / 4; const size_t nsimd4 = (nsimd / 4) * 4; size_t i = 0; for(; i < nsimd4; i += 4) { _mm_storeu_si128((__m128i *)(arr + 4 * i), zero); _mm_storeu_si128((__m128i *)(arr + 4 * (i + 1)), zero); _mm_storeu_si128((__m128i *)(arr + 4 * (i + 2)), zero); _mm_storeu_si128((__m128i *)(arr + 4 * (i + 3)), zero); } for(;i < nsimd; ++i) { _mm_storeu_si128((__m128i *)(arr + 4 * i), zero); } for(i *= 4; i < arr_sz; ++i) { arr[i] = 0; } #endif #else std::memset(arr, 0, sizeof(uint32_t) * arr_sz); #endif } template static uint64_t print_array(const char* prefix, char* chrm, int32_t tid, const T2* arr, const long arr_sz, const bool skip_zeros, bigWigFile_t* bwfp, FILE* cov_fh, const bool dont_output_coverage = false, bool no_region=true, BGZF* gcov_fh = nullptr, hts_idx_t* cidx = nullptr, int* chrms_in_cidx = nullptr, FILE* wcov_fh=nullptr, BGZF* gwcov_fh=nullptr, int window_size=0, Op op = csum) { bool first = true; bool first_print = true; uint32_t running_value = 0; uint32_t last_pos = 0; uint64_t auc = 0; //from https://stackoverflow.com/questions/27401388/efficient-gzip-writing-with-gzprintf int chrnamelen = strlen(chrm); int total_line_len = chrnamelen + COORD_STR_LEN; int num_lines_per_buf = round(OUT_BUFF_SZ / total_line_len) - 3; int buf_written = 0; char* buf = nullptr; char* bufptr = nullptr; int (*printPtr) (void* fh, char* buf, uint32_t buf_len) = &my_write; void* cfh = nullptr; if(!bwfp) { buf = new char[OUT_BUFF_SZ]; bufptr = buf; cfh = cov_fh; //writing gzip if(!cov_fh) { printPtr = &my_gzwrite; cfh = gcov_fh; } } //might only want to print windowed coverage bool print_windowed_coverage = window_size > 0 && (gwcov_fh || wcov_fh); void* wcfh = nullptr; if(print_windowed_coverage) { wcfh = wcov_fh; //this assumes we're never going to have coverage and windowed coverage be different in terms of --gzip if(!wcov_fh) { printPtr = &my_gzwrite; wcfh = gwcov_fh; } } uint32_t buf_len = 0; int bytes_written = 0; char* startp = new char[32]; char* endp = new char[32]; char* valuep = new char[32]; float running_value_ = 0.0; uint32_t wcounter = 0; int64_t wsum = 0; char* wbuf = new char[1024]; int window_bytes_written = -1; uint32_t window_start = 0; //make sure we track this chromosome in whatever index we're building //if we may it this far, means the chromosome had some alignments if(chrms_in_cidx && chrms_in_cidx[tid+1] == 0) chrms_in_cidx[tid+1] = ++chrms_in_cidx[0]; for(uint32_t i = 0; i < arr_sz; i++) { if(first || (!no_region && running_value != arr[i]) || (no_region && arr[i] != 0)) { if(!first) { if(running_value > 0 || !skip_zeros) { //based on wiggletools' AUC calculation auc += (i - last_pos) * ((long) running_value); if(not dont_output_coverage) { if(bwfp && first_print) { running_value_ = static_cast(running_value); bwAddIntervals(bwfp, &chrm, &last_pos, &i, &running_value_, 1); } else if(bwfp) { running_value_ = static_cast(running_value); bwAppendIntervals(bwfp, &last_pos, &i, &running_value_, 1); } else { memcpy(bufptr, chrm, chrnamelen); char *oldbufptr = bufptr; bufptr += chrnamelen; *bufptr++='\t'; //idea from https://github.com/brentp/mosdepth/releases/tag/v0.2.9 uint32_t digits = u32toa_countlut(last_pos, bufptr, '\t'); bufptr+=digits+1; digits = u32toa_countlut(i, bufptr, '\t'); bufptr+=digits+1; digits = u32toa_countlut(running_value, bufptr, '\n'); bufptr+=digits+1; buf_len += (bufptr - oldbufptr); // Track bytes written using the distance bufptr has traveled bufptr[0]='\0'; (*printPtr)(cfh, buf, buf_len); if(cidx) { if(hts_idx_push(cidx, chrms_in_cidx[tid+1]-1, last_pos, i, bgzf_tell((BGZF*) cfh), 1) < 0) { fprintf(stderr,"error writing line in index at coordinates: %s:%u-%u, tid: %d idx tid: %d exiting\n",chrm,last_pos,i, tid, chrms_in_cidx[tid+1]-1); //TODO: change this to a return exit(-1); } } buf_written++; bufptr = buf; buf_written = 0; buf_len = 0; } first_print = false; } } } first = false; if(no_region) running_value += arr[i]; else running_value = arr[i]; last_pos = i; } if(print_windowed_coverage) { if(wcounter == window_size) { if(op == csum) window_bytes_written = sprintf(wbuf, "%s\t%u\t%u\t%ld\n", chrm, window_start, i, wsum); else if(op == cmean) { double wmean = (double)wsum / (double)window_size; //window_bytes_written = sprintf(wbuf, "%s\t%u\t%u\t%.2f\n", chrm, window_start, i, (round(wmean*100.)/100.)); window_bytes_written = sprintf(wbuf, "%s\t%u\t%u\t%.2f\n", chrm, window_start, i, wmean); //window_bytes_written = sprintf(wbuf, "%s\t%u\t%u\t%.2f\t%.11f\t%ld\n", chrm, window_start, i, (round(wmean*100.)/100.), wmean, wsum); } (*printPtr)(wcfh, wbuf, window_bytes_written); wsum = 0; wcounter = 0; window_start = i; } wsum += running_value; wcounter++; } } char last_line[1024]; if(!first) { if(running_value > 0 || !skip_zeros) { auc += (arr_sz - last_pos) * ((long) running_value); if(not dont_output_coverage) { if(bwfp) { running_value_ = static_cast(running_value); if(first_print) { bwAddIntervals(bwfp, &chrm, &last_pos, (uint32_t*) &arr_sz, &running_value_, 1); } else { bwAppendIntervals(bwfp, &last_pos, (uint32_t*) &arr_sz, &running_value_, 1); } } else { if(buf_written > 0) (*printPtr)(cfh, buf, buf_len); // This printing step could also be u32toa_countlut-ified buf_len = sprintf(last_line, "%s\t%u\t%lu\t%u\n", chrm, last_pos, arr_sz, running_value); (*printPtr)(cfh, last_line, buf_len); if(cidx) if(hts_idx_push(cidx, chrms_in_cidx[tid+1]-1, last_pos, arr_sz, bgzf_tell((BGZF*) cfh), 1) < 0) fprintf(stderr,"error writing last line of chromosome in index at coordinates: %s:%u-%ld, exiting\n",chrm,last_pos,arr_sz); } } } if(print_windowed_coverage) { if(op == csum) window_bytes_written = sprintf(wbuf, "%s\t%u\t%lu\t%ld\n", chrm, window_start, arr_sz, wsum); else if(op == cmean) { window_size = arr_sz - window_start; double wmean = (double)wsum / (double)window_size; window_bytes_written = sprintf(wbuf, "%s\t%u\t%lu\t%.2f\n", chrm, window_start, arr_sz, (round(wmean*100.)/100.)); //window_bytes_written = sprintf(wbuf, "%s\t%u\t%u\t%.2f\t%.11f\t%ld\t%ld\n", chrm, window_start, arr_sz, (round(wmean*100.)/100.), wmean, wsum, window_size); } (*printPtr)(wcfh, wbuf, window_bytes_written); } } return auc; } //generic function to loop through cigar //and for each operation/lenth, call a list of functions to process typedef std::vector args_list; typedef void (*callback)(const int, const int, args_list*); typedef std::vector callback_list; static void process_cigar(int n_cigar, const uint32_t *cigar, char** cigar_str, callback_list* callbacks, args_list* outlist) { int cx = 0; for (int k = 0; k < n_cigar; ++k) { const int cigar_op = bam_cigar_op(cigar[k]); const int len = bam_cigar_oplen(cigar[k]); char op_char[2]; op_char[0] = (char) bam_cigar_opchr(cigar[k]); op_char[1] = '\0'; cx += sprintf((*cigar_str)+cx, "%d%s", len, op_char); int i = 0; //now call each callback function for(auto const& func : *callbacks) { (*func)(cigar_op, len, (args_list*) (*outlist)[i]); i++; } } } //mostly cribbed from htslib/sam.c //calculates the mapped length of an alignment static void maplength(const int op, const int len, args_list* out) { int type = bam_cigar_type(op); if ((type & 1) && (type & 2)) *((uint64_t*) (*out)[0]) += len; } static void end_genomic_coord(const int op, const int len, args_list* out) { int type = bam_cigar_type(op); if (type & 2) *((uint64_t*) (*out)[0]) += len; } static const int32_t align_length(const bam1_t *rec) { //bam_endpos processes the whole cigar string return bam_endpos(rec) - rec->core.pos; } typedef hashmap str2cstr; typedef hashmap str2int; typedef std::vector coords; static void extract_junction(const int op, const int len, args_list* out) { uint32_t* base = (uint32_t*) (*out)[0]; //not an intron if(op != BAM_CREF_SKIP) { //but track the length if consuming the ref if(bam_cigar_type(op) & 2) (*base) += len; return; } coords* jxs = (coords*) (*out)[1]; jxs->push_back(*base); (*base) += len; jxs->push_back(*base); } static inline void decrement_coverages(uint32_t *coverages, uint32_t *unique_coverages, int start, int ninc, bool no_region=true) { coverages += start; unique_coverages += start; if(no_region) { int32_t* coverages_ = (int32_t*) coverages; int32_t* unique_coverages_ = (int32_t*) unique_coverages; coverages_[0]--; coverages_[ninc]++; unique_coverages_[0]--; unique_coverages_[ninc]++; return; } #if __AVX512F__ const size_t nper = sizeof(__m512) / sizeof(int32_t); size_t nsimd = ninc / nper; #elif __AVX2__ const size_t nper = sizeof(__m256) / sizeof(int32_t); size_t nsimd = ninc / nper; #elif __SSE2__ const size_t nper = sizeof(__m128) / sizeof(int32_t); size_t nsimd = ninc / nper; #endif int i = 0; #if __AVX512F__ #pragma GCC unroll 4 for(;i < nsimd; ++i) { auto s1 = _mm512_set1_epi32(-1); _mm512_storeu_si512((__m512i *)(coverages + i * nper), _mm512_add_epi32(s1, _mm512_loadu_si512((__m512i *)(coverages + i * nper)))); _mm512_storeu_si512((__m512i *)(unique_coverages + i * nper), _mm512_add_epi32(s1, _mm512_loadu_si512((__m512i *)(unique_coverages + i * nper)))); } i *= nper; #elif __AVX2__ #pragma GCC unroll 4 for(;i < nsimd; ++i) { auto s1 = _mm256_set1_epi32(-1); _mm256_storeu_si256((__m256i *)(coverages + i * nper), _mm256_add_epi32(s1, _mm256_loadu_si256((__m256i *)(coverages + i * nper)))); _mm256_storeu_si256((__m256i *)(unique_coverages + i * nper), _mm256_add_epi32(s1, _mm256_loadu_si256((__m256i *)(unique_coverages + i * nper)))); } i *= nper; #elif __SSE2__ #pragma GCC unroll 4 for(;i < nsimd; ++i) { auto s1 = _mm_set1_epi32(-1); _mm_storeu_si128((__m128i *)(coverages + i * nper), _mm_add_epi32(s1, _mm_loadu_si128((__m128i *)(coverages + i * nper)))); _mm_storeu_si128((__m128i *)(unique_coverages + i * nper), _mm_add_epi32(s1, _mm_loadu_si128((__m128i *)(unique_coverages + i * nper)))); } i *= nper; #endif for(; i < ninc; ++i) { --coverages[i]; --unique_coverages[i]; } } static inline void decrement_coverages(uint32_t *coverages, int ninc, bool no_region=true) { if(no_region) { int32_t* coverages_ = (int32_t*) coverages; coverages_[0]--; coverages_[ninc]++; return; } #if __AVX512F__ const size_t nper = sizeof(__m512) / sizeof(int32_t); size_t nsimd = ninc / nper; #elif __AVX2__ const size_t nper = sizeof(__m256) / sizeof(int32_t); size_t nsimd = ninc / nper; #elif __SSE2__ const size_t nper = sizeof(__m128) / sizeof(int32_t); size_t nsimd = ninc / nper; #endif int i = 0; #if __AVX512F__ #pragma GCC unroll 4 for(;i < nsimd; ++i) { auto s1 = _mm512_set1_epi32(-1); _mm512_storeu_si512((__m512i *)(coverages + i * nper), _mm512_add_epi32(s1, _mm512_loadu_si512((__m512i *)(coverages + i * nper)))); } i *= nper; #elif __AVX2__ #pragma GCC unroll 4 for(;i < nsimd; ++i) { auto s1 = _mm256_set1_epi32(-1); _mm256_storeu_si256((__m256i *)(coverages + i * nper), _mm256_add_epi32(s1, _mm256_loadu_si256((__m256i *)(coverages + i * nper)))); } i *= nper; #elif __SSE2__ #pragma GCC unroll 4 for(;i < nsimd; ++i) { auto s1 = _mm_set1_epi32(-1); _mm_storeu_si128((__m128i *)(coverages + i * nper), _mm_add_epi32(s1, _mm_loadu_si128((__m128i *)(coverages + i * nper)))); } i *= nper; #endif for(; i < ninc; --coverages[i++]); } static inline void increment_coverages(uint32_t *coverages, int ninc, bool no_region=true) { if(no_region) { int32_t* coverages_ = (int32_t*) coverages; coverages_[0]++; coverages_[ninc]--; return; } #if __AVX512F__ const size_t nper = sizeof(__m512) / sizeof(int32_t); size_t nsimd = ninc / nper; #elif __AVX2__ const size_t nper = sizeof(__m256) / sizeof(int32_t); size_t nsimd = ninc / nper; #elif __SSE2__ const size_t nper = sizeof(__m128) / sizeof(int32_t); size_t nsimd = ninc / nper; #endif int i = 0; #if __AVX512F__ for(;i < nsimd; ++i) { auto s1 = _mm512_set1_epi32(1); _mm512_storeu_si512((__m512i *)(coverages + i * nper), _mm512_add_epi32(s1, _mm512_loadu_si512((__m512i *)(coverages + i * nper)))); } i *= nper; #elif __AVX2__ for(;i < nsimd; ++i) { auto s1 = _mm256_set1_epi32(1); _mm256_storeu_si256((__m256i *)(coverages + i * nper), _mm256_add_epi32(s1, _mm256_loadu_si256((__m256i *)(coverages + i * nper)))); } i *= nper; #elif __SSE2__ for(;i < nsimd; ++i) { auto s1 = _mm_set1_epi32(1); _mm_storeu_si128((__m128i *)(coverages + i * nper), _mm_add_epi32(s1, _mm_loadu_si128((__m128i *)(coverages + i * nper)))); } i *= nper; #endif for(; i < ninc; ++coverages[i++]); } static inline void increment_coverages(uint32_t *coverages, uint32_t *unique_coverages, int start, int ninc, bool no_region=true) { coverages += start; unique_coverages += start; if(no_region) { int32_t* coverages_ = (int32_t*) coverages; int32_t* unique_coverages_ = (int32_t*) unique_coverages; coverages_[0]++; coverages_[ninc]--; unique_coverages_[0]++; unique_coverages_[ninc]--; return; } #if __AVX512F__ const size_t nper = sizeof(__m512) / sizeof(int32_t); size_t nsimd = ninc / nper; #elif __AVX2__ const size_t nper = sizeof(__m256) / sizeof(int32_t); size_t nsimd = ninc / nper; #elif __SSE2__ const size_t nper = sizeof(__m128) / sizeof(int32_t); size_t nsimd = ninc / nper; #endif int i = 0; #if __AVX512F__ for(;i < nsimd; ++i) { auto s1 = _mm512_set1_epi32(1); _mm512_storeu_si512((__m512i *)(coverages + i * nper), _mm512_add_epi32(s1, _mm512_loadu_si512((__m512i *)(coverages + i * nper)))); _mm512_storeu_si512((__m512i *)(unique_coverages + i * nper), _mm512_add_epi32(s1, _mm512_loadu_si512((__m512i *)(unique_coverages + i * nper)))); } i *= nper; #elif __AVX2__ for(;i < nsimd; ++i) { auto s1 = _mm256_set1_epi32(1); _mm256_storeu_si256((__m256i *)(coverages + i * nper), _mm256_add_epi32(s1, _mm256_loadu_si256((__m256i *)(coverages + i * nper)))); _mm256_storeu_si256((__m256i *)(unique_coverages + i * nper), _mm256_add_epi32(s1, _mm256_loadu_si256((__m256i *)(unique_coverages + i * nper)))); } i *= nper; #elif __SSE2__ for(;i < nsimd; ++i) { auto s1 = _mm_set1_epi32(1); _mm_storeu_si128((__m128i *)(coverages + i * nper), _mm_add_epi32(s1, _mm_loadu_si128((__m128i *)(coverages + i * nper)))); _mm_storeu_si128((__m128i *)(unique_coverages + i * nper), _mm_add_epi32(s1, _mm_loadu_si128((__m128i *)(unique_coverages + i * nper)))); } i *= nper; #endif for(; i < ninc; ++i) { ++coverages[i]; ++unique_coverages[i]; } } static uint64_t num_overlapping_pairs = 0; //static uint32_t num_opairs[10024]; struct MateInfo { bool passing_qual; std::string qname; //char* qname; int32_t mrefpos; uint32_t n_cigar; uint32_t* cigar; bool erased; }; //typedef hashmap read2len; //typedef hashmap read2len; typedef hashmap*> read2len; typedef hashmap> read2overlaps; static const int32_t calculate_coverage(const bam1_t *rec, uint32_t* coverages, uint32_t* unique_coverages, const bool double_count, const int min_qual, read2len* overlapping_mates, int32_t* total_intron_length, read2overlaps* overlap_coords, bool no_region=true) { int32_t refpos = rec->core.pos; int32_t mrefpos = rec->core.mpos; int32_t refpos_to_hash = mrefpos; //lifted from htslib's bam_cigar2rlen(...) & bam_endpos(...) int32_t algn_end_pos = refpos; const uint32_t* cigar = bam_get_cigar(rec); int k, z; //check for overlapping mate and corect double counting if exists char* qname = bam_get_qname(rec); bool unique = min_qual > 0; bool passing_qual = rec->core.qual >= min_qual; //fix paired mate overlap double counting //fix overlapping mate pair, only if 1) 2nd mate and //2) we're either not unique, or we're higher than the required quality int32_t mendpos = 0; int n_mspans = 0; std::unique_ptr mspans; //std::unique_ptr mspans_which_overlap; int mspans_idx = 0; int mspans_which_overlap_idx = 0; read2overlaps::iterator overlapping_coords_it; if(overlap_coords) overlapping_coords_it = overlap_coords->begin(); const std::string tn(qname); int32_t end_pos = bam_endpos(rec); uint32_t mate_passes_quality = 0; //-----First Mate Check //if we're the first mate and //we're avoiding double counting and we're a proper pair //and we overlap with our mate, then store our cigar + length //for the later mate to adjust its coverage appropriately if(coverages && !double_count && (rec->core.flag & BAM_FPROPER_PAIR) == 2) { bool possible_overlap = rec->core.tid == rec->core.mtid && end_pos > mrefpos; bool first_mate_w_overlap = possible_overlap && refpos <= mrefpos; bool second_mate = possible_overlap && refpos >= mrefpos; if(second_mate) refpos_to_hash = refpos; //1) we're on the same chrm as our mate AND //2) we're either the first mate overlapping with the 2nd, or we're the 2nd mate //so we could have mate overlap if(first_mate_w_overlap || second_mate) { std::vector* mate_vec = nullptr; MateInfo* mate_info = nullptr; auto mit = overlapping_mates->find(refpos_to_hash); bool potential_mate_found = mit != overlapping_mates->end(); //if we found a potential mate in the hash based on pos int mvi = 0; if(potential_mate_found) { mate_vec = mit->second; for(auto mate : *mate_vec) { //fprintf(stderr,"name check for refpos %u mrefpos %u: %s vs. %s\n",refpos, mrefpos, tn.c_str(), mate->qname); if(!mate->erased && mate->qname == tn) { mate_info = mate; break; } mvi++; } } //first mate in the pair if(first_mate_w_overlap && !mate_info) { const uint32_t* mcigar = bam_get_cigar(rec); uint32_t n_cigar = rec->core.n_cigar; mate_info = new MateInfo; mate_info->passing_qual = unique && passing_qual; mate_info->qname = tn; mate_info->mrefpos = refpos; mate_info->n_cigar = n_cigar; mate_info->cigar = new uint32_t[n_cigar]; std::memcpy(mate_info->cigar, mcigar, 4*n_cigar); mate_info->erased = false; //if we didn't find a previous vector, create one if(!potential_mate_found) { mate_vec = new std::vector; overlapping_mates->emplace(mrefpos, mate_vec); } mate_vec->push_back(mate_info); num_overlapping_pairs++; } //-------Second Mate Check else if(second_mate && mate_info) { //setup for tracking actual overlapping segments for alt base output if(overlap_coords) overlapping_coords_it = overlap_coords->emplace(tn, std::vector()).first; uint32_t mn_cigar = mate_info->n_cigar; mate_passes_quality = mate_info->passing_qual; uint32_t* mcigar = mate_info->cigar; int32_t real_mate_pos = mate_info->mrefpos; int32_t malgn_end_pos = real_mate_pos; //bash cigar to get spans of overlap mspans.reset(new int32_t[mn_cigar * 2]); //if(overlap_coords) //mspans_which_overlap.reset(new int32_t[(mn_cigar * 2) + 1]); for (k = 0; k < mn_cigar; ++k) { const int cigar_op = bam_cigar_op(mcigar[k]); if(bam_cigar_type(cigar_op)&2) { const int32_t len = bam_cigar_oplen(mcigar[k]); if(bam_cigar_type(cigar_op)&1) { mspans[mspans_idx * 2] = malgn_end_pos; mspans[mspans_idx * 2 + 1] = malgn_end_pos + len; mspans_idx++; } malgn_end_pos += len; } } delete[] mcigar; mate_info->erased = true; //overlapping_mates->erase(mit); delete mate_info; mate_vec->erase(mate_vec->begin()+mvi); if(mate_vec->size() == 0) { //mate_vec->shrink_to_fit(); //std::vector().swap(*mate_vec); //fprintf(stderr, "erasing vector\n"); //delete mate_vec; delete mate_vec; overlapping_mates->erase(mit); } n_mspans = mspans_idx; mendpos = malgn_end_pos; } } } mspans_idx = 0; if(unique && passing_qual) { int32_t lastref = 0; for (k = 0; k < rec->core.n_cigar; ++k) { const int cigar_op = bam_cigar_op(cigar[k]); //do we consume ref? if(bam_cigar_type(cigar_op)&2) { const int32_t len = bam_cigar_oplen(cigar[k]); if(cigar_op == BAM_CREF_SKIP) (*total_intron_length) = (*total_intron_length) + len; //are we calc coverages && do we consume query? if(coverages && bam_cigar_type(cigar_op)&1) { increment_coverages(coverages, unique_coverages, algn_end_pos, len, no_region); //now fixup overlapping segment but only if mate passed quality if(n_mspans > 0 && algn_end_pos < mendpos) { //loop until we find the next overlapping span //if are current segment is too early we just keep the span index where it is while(mspans_idx < n_mspans && algn_end_pos >= mspans[mspans_idx * 2 + 1]) mspans_idx++; int32_t cur_end = algn_end_pos + len; int32_t left_end = algn_end_pos; if(left_end < mspans[mspans_idx * 2]) left_end = mspans[mspans_idx * 2]; //check 1) we've still got mate spans 2) current segment overlaps the current mate span while(mspans_idx < n_mspans && left_end < mspans[mspans_idx * 2 + 1] && cur_end > mspans[mspans_idx * 2]) { //set right end of segment to decrement int32_t right_end = cur_end; int32_t next_left_end = left_end; if(right_end >= mspans[mspans_idx * 2 + 1]) { right_end = mspans[mspans_idx * 2 + 1]; //if our segment is greater than the previous mate's //also increment the mate spans index mspans_idx++; if(mspans_idx < n_mspans) next_left_end = mspans[mspans_idx * 2]; } else { next_left_end = mspans[mspans_idx * 2 + 1]; } decrement_coverages(coverages + left_end, right_end - left_end, no_region); if(overlap_coords) { int32_t ostart = left_end; int32_t oend = (ostart + (right_end - left_end)) - 1; Coordinate coord; coord.start = ostart; coord.end = oend; overlapping_coords_it->second.push_back(coord); //base-1 coords for overlapping segment //mspans_which_overlap[mspans_which_overlap_idx * 2] = ostart; //mspans_which_overlap[mspans_which_overlap_idx * 2 + 1] = (ostart + (right_end - left_end)) - 1; //mspans_which_overlap_idx++; } if(mate_passes_quality) decrement_coverages(unique_coverages + left_end, right_end - left_end, no_region); left_end = next_left_end; } } } algn_end_pos += len; } } } else { for (k = 0; k < rec->core.n_cigar; ++k) { const int cigar_op = bam_cigar_op(cigar[k]); //do we consume ref? if(bam_cigar_type(cigar_op)&2) { const int32_t len = bam_cigar_oplen(cigar[k]); if(cigar_op == BAM_CREF_SKIP) (*total_intron_length) = (*total_intron_length) + len; //are we calc coverages && do we consume query? if(coverages && bam_cigar_type(cigar_op)&1) { increment_coverages(&coverages[algn_end_pos], len, no_region); //now fixup overlapping segment if(n_mspans > 0 && algn_end_pos < mendpos) { //loop until we find the next overlapping span //if are current segment is too early we just keep the span index where it is while(mspans_idx < n_mspans && algn_end_pos >= mspans[mspans_idx * 2 + 1]) mspans_idx++; int32_t cur_end = algn_end_pos + len; int32_t left_end = algn_end_pos; if(left_end < mspans[mspans_idx * 2]) left_end = mspans[mspans_idx * 2]; //check 1) we've still got mate spans 2) current segment overlaps the current mate span while(mspans_idx < n_mspans && left_end < mspans[mspans_idx * 2 + 1] && cur_end > mspans[mspans_idx * 2]) { //set right end of segment to decrement int32_t right_end = cur_end; int32_t next_left_end = left_end; if(right_end >= mspans[mspans_idx * 2 + 1]) { right_end = mspans[mspans_idx * 2 + 1]; //if our segment is greater than the previous mate's //also increment the mate spans index //delete[] mspans[mspans_idx]; mspans_idx++; if(mspans_idx < n_mspans) next_left_end = mspans[mspans_idx * 2]; } else { next_left_end = mspans[mspans_idx * 2 + 1]; } decrement_coverages(&coverages[left_end], right_end - left_end, no_region); left_end = next_left_end; } } } algn_end_pos += len; } } } return algn_end_pos; } template using annotation_map_t = hashmap>; typedef std::vector strlist; //about 3x faster than the sstring/string::getline version template static const int process_region_line(char* line, const char* delim, annotation_map_t* amap, strlist* chrm_order, bool keep_order, annotation_map_t* acmap, str2int* chrms_done, char** ppchrm, long* ppstart, char** pchrm, long* pstart, long* pend) { char* tok = strtok(line, delim); int i = 0; char* chrm = nullptr; long start = -1; long end = -1; int ret = 0; int last_col = END_COL; while(tok != nullptr) { if(i > last_col) break; if(i == CHRM_COL) chrm = strdup(tok); else if(i == START_COL) start = atol(tok); else if(i == END_COL) end = atol(tok); i++; tok = strtok(nullptr, delim); } //if we need to keep the order, then we'll store values here const int alen = keep_order?4:2; T* coords = new T[alen]; coords[0] = start; coords[1] = end; std::fill(coords + 2, coords + alen, 0); //check that the annotation chromosomes are contiguous //basically the same as the check htslib/tabix does when indexing to ensure sorted contiguous chromosomes if(SORTED_ANNOTATIONS && *ppchrm && strcmp(*ppchrm, chrm) != 0) { auto it = chrms_done->find(chrm); if(it != chrms_done->end()) { //fprintf(stderr,"annotation BED file contains out of order chromosome(s): %s, terminating early\n",chrm); //return -1; fprintf(stderr,"annotation BED file contains out of order chromosomes(s): %s\t%ld\t%ld\n, falling back to slower BigWig matching (doesn't affect BAM processing)\nFor potentially faster performance in BigWig reading, please sort your argument to --annotations (BED) file via sort -k1,1 -k2,2n and re-run megadepth\n",chrm,start,end); SORTED_ANNOTATIONS = false; } else { chrms_done->emplace(*ppchrm, 1); *ppstart = -1; } } *ppchrm = chrm; auto it = amap->find(chrm); if(it == amap->end()) { chrm_order->push_back(chrm); it = amap->emplace(chrm, std::vector()).first; *ppstart = -1; } it->second.push_back(coords); //check for unsorted BED file, if unsorted, fall back to slower version: //don't use collapsed annotations to reduce index calls (i.e. acmap) //basically the same as the check htslib/tabix does when indexing to ensure sorted positions within a chromosome if(SORTED_ANNOTATIONS && start < *ppstart) { fprintf(stderr,"unsorted interval: %s\t%ld\t%ld\n, falling back to slower matching (doesn't affect BAM processing)\nFor potentially faster BigWig reading performance, please sort your argument to --annotations (BED) file via sort -k1,1 -k2,2n and re-run megadepth.\n",chrm,start,end); SORTED_ANNOTATIONS = false; } *ppstart = start; if(SORTED_ANNOTATIONS && acmap) { bool save_previous_coords = false; bool update_coords = true; if(*pchrm) { save_previous_coords = true; if(strcmp(chrm,*pchrm) == 0 && start - *pend < COLLAPSED_ANNOTATION_MAX_DISTANCE && end >= *pend) { *pend = end; update_coords = false; save_previous_coords = false; } } if(save_previous_coords) { long* coords0 = new long[2]; coords0[0] = *pstart; coords0[1] = *pend; auto it = acmap->find(*pchrm); if(it == acmap->end()) { it = acmap->emplace(*pchrm, std::vector()).first; } it->second.push_back(coords0); } if(update_coords) { *pchrm = chrm; *pstart = start; *pend = end; } } return ret; } template static const int read_annotation(FILE* fin, annotation_map_t* amap, strlist* chrm_order, bool keep_order, uint64_t* num_annotations, annotation_map_t* acmap) { //track chromosomes to detect out of order annotation file str2int chrms_done; char *ppchrm = nullptr; //track the previous start position for checking for unsorted annotation input long ppstart = -1; //track collapsed annotation interval to reduce calls to BW's R-index if sorted annotation input long pstart = -1; long pend = -1; char *pchrm = nullptr; char *line = (char *)std::malloc(LINE_BUFFER_LENGTH); size_t length = LINE_BUFFER_LENGTH; assert(fin); ssize_t bytes_read = getline(&line, &length, fin); int err = 0; while(bytes_read != -1) { err = process_region_line(line, "\t", amap, chrm_order, keep_order, acmap, &chrms_done, &ppchrm, &ppstart, &pchrm, &pstart, &pend); if(err) { std::cerr << "Error: " << err << " in process_region_line.\n"; break; } assert(err==0); (*num_annotations)++; bytes_read = getline(&line, &length, fin); } if(SORTED_ANNOTATIONS && acmap && pchrm) { long* coords0 = new long[2]; coords0[0] = pstart; coords0[1] = pend; auto it = acmap->find(pchrm); if(it == acmap->end()) { it = acmap->emplace(pchrm, std::vector()).first; } it->second.push_back(coords0); } std::free(line); std::cerr << "building whole annotation region map done\n"; return err; } typedef hashmap str2op; template static void sum_annotations(const uint32_t* coverages, const std::vector& annotations, const long chr_size, const char* chrm, FILE* ofp, uint64_t* annotated_auc, Op op, bool just_auc = false, int keep_order_idx = -1) { unsigned long z, j; int (*printPtr) (char* buf, const char*, long, long, T, double*, long) = &print_shared; int (*outputFunc)(void* fh, char* buf, uint32_t buf_len) = &my_write; if(SUMS_ONLY) printPtr = &print_shared_sums_only; char* buf = new char[1024]; for(z = 0; z < annotations.size(); z++) { T sum = 0; T start = annotations[z][0]; T end = annotations[z][1]; T local_sum = 0; for(j = start; j < end; j++) { assert(j < chr_size); local_sum += coverages[j]; } sum += local_sum; (*annotated_auc) = (*annotated_auc) + sum; if(!just_auc) { if(op == cmean) sum = (double)local_sum / ((double)(end-start)); if(keep_order_idx == -1) { int buf_len = (*printPtr)(buf, chrm, (long) start, (long) end, sum, nullptr, 0); (*outputFunc)(ofp, buf, buf_len); } else annotations[z][keep_order_idx] = sum; } } } static bigWigFile_t* create_bigwig_file(const bam_hdr_t *hdr, const char* out_fn, const char *suffix) { if(bwInit(BW_READ_BUFFER) != 0) { fprintf(stderr, "Failed when calling bwInit with %d init val\n", BIGWIG_INIT_VAL); return nullptr; } char fn[1024] = ""; sprintf(fn, "%s.%s", out_fn, suffix); bigWigFile_t* bwfp = bwOpen(fn, nullptr, "w"); if(!bwfp) { fprintf(stderr, "Failed when attempting to open BigWig file %s for writing\n", fn); return nullptr; } //create with up to 10 zoom levels (though probably less in practice) bwCreateHdr(bwfp, 10); bwfp->cl = bwCreateChromList(hdr->target_name, hdr->target_len, hdr->n_targets); bwWriteHdr(bwfp); return bwfp; } int KALLISTO_MAX_FRAG_LENGTH = 1000; typedef hashmap fraglen2count; static void print_frag_distribution(const fraglen2count* frag_dist, FILE* outfn) { double mean = 0.0; uint64_t count = 0; //track a Kallisto-comparable version separately double kmean = 0.0; uint64_t kcount = 0; uint64_t mode = 0; uint64_t mode_count = 0; for(auto kv: *frag_dist) { fprintf(outfn, "%d\t%u\n", kv.first, kv.second); count += kv.second; mean += (kv.first*kv.second); if(kv.first < KALLISTO_MAX_FRAG_LENGTH) { kcount += kv.second; kmean += (kv.first*kv.second); } if(kv.second > mode_count) { mode_count = kv.second; mode = kv.first; } } mean /= count; kmean /= kcount; fprintf(outfn, "STAT\tCOUNT\t%" PRIu64 "\n", count); fprintf(outfn, "STAT\tMEAN_LENGTH\t%.3f\n", mean); fprintf(outfn, "STAT\tMODE_LENGTH\t%" PRIu64 "\n", mode); fprintf(outfn, "STAT\tMODE_LENGTH_COUNT\t%" PRIu64 "\n", mode_count); fprintf(outfn, "STAT\tKALLISTO_COUNT\t%" PRIu64 "\n", kcount); fprintf(outfn, "STAT\tKALLISTO_MEAN_LENGTH\t%.3f\n", kmean); } void output_read_sequence_and_qualities(char* qname, int midx, uint8_t* seq, uint8_t* qual, size_t l_qseq, bool reversed, std::ostream* outfh, bool one_file) { (*outfh) << "@" << qname; if(!one_file) (*outfh) << "/" << midx; (*outfh) << "\n"; seq_substring(*outfh, seq, 0, l_qseq, reversed); (*outfh) << "\n+\n"; qstr_substring(*outfh, qual, 0, l_qseq, reversed); (*outfh) << "\n"; } static int process_bigwig_for_total_auc(const char* fn, double* all_auc, FILE* errfp = stderr) { //in part lifted from https://github.com/dpryan79/libBigWig/blob/master/test/testIterator.c //this is the buffer if(bwInit(BW_READ_BUFFER) != 0) { fprintf(errfp, "Error in bwInit, exiting\n"); return -1; } bigWigFile_t *fp = bwOpen((char *)fn, NULL, "r"); if(!fp) { fprintf(errfp, "Error in opening %s as BigWig file, exiting\n", fn); return -1; } fprintf(stdout,"opened %s, BW read buffer is %u\n",fn, BW_READ_BUFFER); fflush(stdout); uint32_t i, tid, blocksPerIteration; //better to ask for a few blocks for better memory and time stats blocksPerIteration = 10; bwOverlapIterator_t *iter = nullptr; uint64_t total_num_intervals = 0; //loop through all the chromosomes in the BW for(tid = 0; tid < fp->cl->nKeys; tid++) { if(fp->cl->len[tid] < 1) continue; iter = bwOverlappingIntervalsIterator(fp, fp->cl->chrom[tid], 0, fp->cl->len[tid], blocksPerIteration); if(!iter->data) { fprintf(errfp, "WARNING: no intervals for chromosome %s in %s as BigWig file, skipping\n", fp->cl->chrom[tid], fn); goto next; continue; } while(iter->data) { uint32_t num_intervals = iter->intervals->l; total_num_intervals+=num_intervals; uint32_t istart = 0; uint32_t iend = 0; for(int j = 0; j < num_intervals; j++) { istart = iter->intervals->start[j]; iend = iter->intervals->end[j]; double value = (iend-istart) * iter->intervals->value[j]; (*all_auc) += value; } iter = bwIteratorNext(iter); } next: // To ensure that we are destroying for cases where no intervals are available (1115) // Could replace with RAII, but this is simpler and fits the style better bwIteratorDestroy(iter); } bwClose(fp); bwCleanup(); return 0; } using chr2bool = hashset; template static int process_bigwig(const strlist* chrm_order, const char* fn, double* annotated_auc, annotation_map_t* amap, chr2bool* annotation_chrs_seen, FILE* afp, int keep_order_idx = -1, Op op = csum, FILE* errfp = stderr, str2dblist* store_local=nullptr, annotation_map_t* acmap = nullptr) { //in part lifted from https://github.com/dpryan79/libBigWig/blob/master/test/testIterator.c if(bwInit(BW_READ_BUFFER) != 0) { fprintf(errfp, "Error in bwInit, exiting\n"); return -1; } bigWigFile_t *fp = bwOpen((char *)fn, NULL, "r"); if(!fp) { fprintf(errfp, "Error in opening %s as BigWig file, exiting\n", fn); return -1; } int (*printPtr) (char* buf, const char*, long, long, T, double*, long) = &print_shared; int (*outputFunc)(void* fh, char* buf, uint32_t buf_len) = &my_write; if(SUMS_ONLY) printPtr = &print_shared_sums_only; char* buf = new char[1024]; uint32_t tid, blocksPerIteration; bwOverlappingIntervals_t *intervals = nullptr; long num_annotations_processed = 0; std::vector* collapsed = nullptr; long collapsed_idx = 0; long collapsed_size = -1; long* collapsed_coords = nullptr; //chrm_order matches what's loaded into amap (same order) for(auto const chrom : *chrm_order) { if(!chrom) continue; std::vector& annotations = (*amap)[chrom]; if(acmap && SORTED_ANNOTATIONS) { //for a new chromosome, set all collapsed variables back to their initialized state collapsed_idx = 0; collapsed = &((*acmap)[chrom]); collapsed_size = collapsed->size(); if(intervals) bwDestroyOverlappingIntervals(intervals); intervals = nullptr; } uint32_t istart = -1; uint32_t iend = -1; long z, j, k; long asz = annotations.size(); double* local_vals; //loop through annotation intervals as outer loop for(z = 0; z < asz; z++) { const auto &az = annotations[z]; double sum = 0; double min = MAX_INT; double max = 0; T start = az[0]; T ostart = start; T end = az[1]; //1st check to see if we're still within the collapsed interval //if the BED file is 1) sorted and 2) we're using collapsed intervals to speed up if(SORTED_ANNOTATIONS && acmap) { //1st: get current collapsed interval collapsed_coords = (*collapsed)[collapsed_idx]; //2nd: check to see if our current annotation interval with contained within the collapased interval while(!(start >= collapsed_coords[0] && end <= collapsed_coords[1])) { //2nd A: only do this if we need to move collapsed intervals on the same chromosome if(intervals) { bwDestroyOverlappingIntervals(intervals); intervals = nullptr; } collapsed_idx++; if(collapsed_idx >= collapsed_size) { fprintf(stderr,"ERROR ran out of collapsed intervals, this shouldn't happen, terminating early!\n"); return -1; } //2nd B: updated collapsed interval for next check collapsed_coords = (*collapsed)[collapsed_idx]; } //3rd: check to see if we need to do the interval query again, based on: //if 1) a new chromosome OR 2) a new collapsed interval if(!intervals) intervals = bwGetOverlappingIntervals(fp, chrom, collapsed_coords[0], collapsed_coords[1]); } //slower way but works for any and all BED files (including unsorted ones) else intervals = bwGetOverlappingIntervals(fp, chrom, start, end); //now loop through the intervals for this annotation if(intervals && intervals->l > 0) { uint32_t num_intervals = intervals->l; //BigWigs don't support overlapping intervals according to: //https://github.com/deeptools/pyBigWig/issues/93 //so we're free to do incremental matching across an annotation interval for(j = 0; j < num_intervals; j++) { istart = intervals->start[j]; iend = intervals->end[j]; //this annotation interval is too early, skip the whole rest of the set from the BigWig //since these are BED starts, even if it overlaps the start coordinate it still doesn't overlap if(end <= istart) break; //is our start and/or end overlapping? if((start >= istart && start < iend) || (end > istart && end <= iend) || (start < istart && end > iend)) { double first_k = start < istart ? istart : start; double last_k = end > iend ? iend : end; //stat mode //avoid having if's in the inner loops as much as possible switch(op) { case csum: case cmean: sum += (intervals->value[j]*(last_k - first_k)); break; case cmin: min = intervals->value[j] < min ? intervals->value[j]:min; break; case cmax: max = intervals->value[j] > max ? intervals->value[j]:max; break; } //fprintf(errfp, "MATCHING\t%s\t%d\t%d\t%.0f\t%d\t%d\t%.0f\t%0.f\t%0.f\n", chrom, istart, iend, iter->intervals->value[j],first_k,last_k,sum,start,end); //move start up if(last_k < end) start = last_k; //break out if we've hit the end of this annotation interval if(last_k >= end) break; } } } if(op == csum) (*annotated_auc) += sum; //0-based start double annot_length = end - ostart; T value = sum; switch(op) { case cmean: value = (double)sum / (double)annot_length; break; case cmin: value = min; break; case cmax: value = max; break; case csum:; // do nothing } //not trying to keep the order in the BED file, just print them as we find them if(keep_order_idx == -1) { int buf_len = (*printPtr)(buf, fp->cl->chrom[tid], (long) ostart, (long) end, value, nullptr, 0); (*outputFunc)(afp, buf, buf_len); } else if(store_local) local_vals[z] = value; else az[keep_order_idx] = value; num_annotations_processed++; /*if(num_annotations_processed % 1000 == 0) fprintf(stderr,"processed %u annotations\n",num_annotations_processed);*/ } annotation_chrs_seen->insert(chrom); if(store_local) (*store_local)[chrom] = local_vals; } if(intervals) bwDestroyOverlappingIntervals(intervals); bwClose(fp); bwCleanup(); return 0; } template static void output_missing_annotations(const annotation_map_t* annotations, const chr2bool* annotations_seen, FILE* ofp, Op op = csum) { //check if we're doing means output doubles, otherwise output longs T val = 0; int (*printPtr) (char* buf, const char*, long, long, T, double*, long) = &print_shared; int (*outputFunc)(void* fh, char* buf, uint32_t buf_len) = &my_write; if(SUMS_ONLY) printPtr = &print_shared_sums_only; char* buf = new char[1024]; for(auto const& kv : *annotations) { if(annotations_seen->find(kv.first) == annotations_seen->end()) { const auto &ants = kv.second; for(unsigned long z = 0; z < kv.second.size(); z++) { const auto p = ants[z]; int buf_len = (*printPtr)(buf, kv.first.c_str(), p[0], p[1], val, nullptr, z); (*outputFunc)(ofp, buf, buf_len); } } } } template void output_all_coverage_ordered_by_BED(const strlist* chrm_order, annotation_map_t* annotations, FILE* afp, BGZF* afpz, FILE* uafp,BGZF* uafpz, Op op = csum, str2dblist* store_local = nullptr) { int (*outputFunc)(void* fh, char* buf, uint32_t buf_len) = &my_write; void* out_fh = afp; void* uout_fh = uafp; if(afpz) { outputFunc = &my_gzwrite; out_fh = afpz; } if(uafpz) uout_fh = uafpz; double* local_vals = nullptr; for(auto const c : *chrm_order) { if(!c) continue; std::vector& annotations_for_chr = (*annotations)[c]; int (*printPtr) (char*, const char*, long, long, T, double*, long) = &print_shared; if(SUMS_ONLY) printPtr = &print_shared_sums_only; if(store_local) { local_vals = (*store_local)[c]; printPtr = &print_local; if(SUMS_ONLY) printPtr = &print_local_sums_only; } //check if we're doing means output doubles, otherwise output longs char* buf = new char[OUT_BUFF_SZ]; char* bufptr = buf; int buf_len = 0; int buf_written = 0; //unique char* ubuf = nullptr; if(uafp) ubuf = new char[OUT_BUFF_SZ]; char* ubufptr = ubuf; int ubuf_len = 0; int ubuf_written = 0; int num_lines_per_buf = round(OUT_BUFF_SZ / COORD_STR_LEN) - 3; for(long z = 0; z < annotations_for_chr.size(); z++) { const auto &item = annotations_for_chr[z]; const T start = item[0], end = item[1]; T val = item[2]; if(buf_written >= num_lines_per_buf) { bufptr[0]='\0'; (*outputFunc)(out_fh, buf, buf_len); bufptr = buf; buf_written = 0; buf_len = 0; } int written = (*printPtr)(bufptr, c, (long) start, (long) end, val, local_vals, z); bufptr += written; buf_len += written; buf_written++; //do uniques if asked to if(uafp) { val = item[3]; if(ubuf_written >= num_lines_per_buf) { ubufptr[0]='\0'; (*outputFunc)(uout_fh, ubuf, ubuf_len); ubufptr = ubuf; ubuf_written = 0; ubuf_len = 0; } written = (*printPtr)(ubufptr, c, (long) start, (long) end, val, local_vals, z); ubufptr += written; ubuf_len += written; ubuf_written++; } } char last_line[1024]; if(buf_written > 0) { bufptr[0]='\0'; (*outputFunc)(out_fh, buf, buf_len); } if(ubuf_written > 0) { ubufptr[0]='\0'; (*outputFunc)(uout_fh, ubuf, ubuf_len); } } } //multiple sources for this kind of tokenization, one which was useful was: //https://yunmingzhang.wordpress.com/2015/07/14/how-to-read-file-line-by-lien-and-split-a-string-in-c/ void split_string(std::string line, char delim, strvec* tokens) { tokens->clear(); std::stringstream ss(line); std::string token; while(getline(ss, token, delim)) { tokens->push_back(token); } } template void process_bigwig_worker(strvec& bwfns, annotation_map_t* annotations, strlist* chrm_order, int keep_order_idx, Op op) { //want to just get the filename itself, no path str2dblist store_local; for(auto bwfn_ : bwfns) { strvec tokens; const char* bwfn = bwfn_.c_str(); fprintf(stderr, "about to process %s\n", bwfn); std::string str(bwfn_); split_string(str, '/', &tokens); char afn[1024]; FILE* afp = nullptr; sprintf(afn, "%s.err", tokens.back().c_str()); FILE* errfp = fopen(afn, "w"); sprintf(afn, "%s.all.tsv", tokens.back().c_str()); afp = fopen(afn, "w"); chr2bool annotation_chrs_seen; double annotated_auc = 0.0; int ret = process_bigwig(NULL,bwfn, &annotated_auc, annotations, &annotation_chrs_seen, afp, keep_order_idx, op = op, errfp = errfp, &store_local); if(ret != 0) { fprintf(errfp,"FAILED to process bigwig %s\n", bwfn); if(afp) fclose(afp); fclose(errfp); return; } //if we wanted to keep the chromosome order of the annotation output matching the input BED file if(keep_order_idx == 2) output_all_coverage_ordered_by_BED(chrm_order, annotations, afp, nullptr, nullptr, nullptr, op, &store_local); else output_missing_annotations(annotations, &annotation_chrs_seen, afp, op = op); if(afp) fclose(afp); //fprintf(aucfp, "AUC\t%" PRIu64 "\n", annotated_auc); fprintf(stdout, "AUC_ANNOTATED_BASES\t%.3f\t%s\n", annotated_auc, bwfn); //fprintf(errfp, "AUC\t%.3f\n", annotated_auc); fprintf(errfp,"SUCCESS processing bigwig %s\n", bwfn); fclose(errfp); } //hold off on final deletion, for performance /*for( auto mitr : store_local) delete mitr.second;*/ } Op get_operation(const char* opstr) { if(strcmp(opstr, "mean") == 0) return cmean; if(strcmp(opstr, "min") == 0) return cmin; if(strcmp(opstr, "max") == 0) return cmax; return csum; } typedef hashmap str2str; static const uint64_t frag_lens_mask = 0x00000000FFFFFFFF; static const int FRAG_LEN_BITLEN = 32; template int go_bw(const char* bw_arg, int argc, const char** argv, Op op, htsFile *bam_fh, int nthreads, bool keep_order, bool has_annotation, FILE* afp, BGZF* afpz, annotation_map_t* annotations, chr2bool* annotation_chrs_seen, const char* prefix, bool sum_annotation, strlist* chrm_order, FILE* auc_file, uint64_t num_annotations, annotation_map_t* acmap) { //only calculate AUC across either the BAM or the BigWig, but could be restricting to an annotation as well int err = 0; bool LOAD_BALANCE = false; int slen = strlen(bw_arg); bool is_bw_list_file = strcmp(bw_arg+(slen-3), "txt") == 0; fprintf(stderr,"Processing %s\n",bw_arg); fflush(stderr); //just do all/total AUC if no options are passed in if(argc == 1 || (argc == 2 && has_option(argv, argv+argc, "--auc")) || (argc == 3 && has_option(argv, argv+argc, "--bwbuffer")) || (argc == 4 && has_option(argv, argv+argc, "--bwbuffer") && has_option(argv, argv+argc, "--auc"))) { //should be the same as "all_auc" except support possibility of continuous values //in the BigWig (but not in the BAM, since we control how we count) double total_auc = 0.0; int ret = process_bigwig_for_total_auc(bw_arg, &total_auc); if(ret == 0) fprintf(stdout, "AUC_ALL_BASES\t%.3f\n", total_auc); return ret; } double annotated_total_auc = 0.0; //process bigwig for annotation/auc int keep_order_idx = keep_order?2:-1; //TODO: look into implemention multithreaded mode for single BigWig processing (maybe per chromosome?) if(is_bw_list_file) { strvec* files_per_thread[nthreads]; uint64_t bytes_per_thread[nthreads]; for(int i=0; i < nthreads; i++) { files_per_thread[i] = new strvec(); bytes_per_thread[i] = 0; } FILE* bw_list_fp = fopen(bw_arg, "r"); if(unlikely(bw_list_fp == nullptr)) assert(false); char *bwfn = (char *)std::malloc(LINE_BUFFER_LENGTH); size_t length = LINE_BUFFER_LENGTH; ssize_t bytes_read = getline(&bwfn, &length, bw_list_fp); int file_idx = 0; struct stat fstat; mate2len file2size; strvec files; std::vector fsizes; uint64_t total_fsize = 0; uint32_t num_files = 0; while(bytes_read != -1) { char *bp = bwfn; bp[bytes_read-1]='\0'; int thread_i = file_idx++ % nthreads; std::string str(bp); files.push_back(str); if(LOAD_BALANCE) { stat(bp, &fstat); fsizes.push_back(fstat.st_size); total_fsize += fstat.st_size; } num_files++; bytes_read = getline(&bwfn, &length, bw_list_fp); } //now load balance between threads based on file size uint64_t per_thread_size = total_fsize / nthreads; int max_num_files_per_thread = num_files / nthreads; int thread_i = 0; int num_files_current_thread = 0; for(int i=0; i < num_files; i++) { if((LOAD_BALANCE && bytes_per_thread[thread_i] + fsizes[i] > per_thread_size) || (num_files_current_thread >= max_num_files_per_thread) && thread_i+1 < nthreads) { thread_i++; num_files_current_thread = 0; } if(LOAD_BALANCE) bytes_per_thread[thread_i] += fsizes[i]; files_per_thread[thread_i]->push_back(files[i]); num_files_current_thread++; } std::vector threads; for(int i=0; i < nthreads; i++) { threads.push_back(std::thread(process_bigwig_worker, std::ref(*(files_per_thread[i])), annotations, chrm_order, keep_order_idx, op=op)); } for(auto &t: threads) t.join(); fclose(bw_list_fp); if(afp && afp != stdout) fclose(afp); if(afpz) bgzf_close(afpz); std::free(bwfn); return 0; } //don't have a list of BigWigs, so just process the single one int ret = process_bigwig(chrm_order, bw_arg, &annotated_total_auc, annotations, annotation_chrs_seen, afp, keep_order_idx, op, stderr, nullptr, acmap); if(ret != 0) { return ret; } //if we wanted to keep the chromosome order of the annotation output matching the input BED file if(keep_order) output_all_coverage_ordered_by_BED(chrm_order, annotations, afp, afpz, nullptr, nullptr, op); else output_missing_annotations(annotations, annotation_chrs_seen, afp, op = op); if(afp && afp != stdout) fclose(afp); if(ret == 0 && auc_file) fprintf(auc_file, "AUC_ANNOTATED_BASES\t%.3f\n", annotated_total_auc); if(auc_file && auc_file != stdout) fclose(auc_file); return ret; } int sam_index_iterator_wrapper(bam1_t* b, htsFile* bfh, bam_hdr_t* bhdr, hts_itr_t* sam_itr) { return sam_itr_next(bfh, sam_itr, b); } int sam_scan_iterator_wrapper(bam1_t* b, htsFile* bfh, bam_hdr_t* bhdr, hts_itr_t* sam_itr) { return sam_read1(bfh, bhdr, b); } int NUM_CHARS_IN_REGION_STR = 1000; //based on http://www.cplusplus.com/reference/iterator/iterator/ template class BAMIterator : public std::iterator { bam1_t* b; htsFile* bfh; bam_hdr_t* bhdr; hts_idx_t* bidx; hts_itr_t* sam_itr; int (*itrPtr)(bam1_t* b, htsFile* bfh, bam_hdr_t* bhdr, hts_itr_t* sam_itr) = &sam_scan_iterator_wrapper; char* amap; char** amap_ptr; public: BAMIterator(bam1_t* z, htsFile* bam_fh, bam_hdr_t* bam_hdr) :b(z),bfh(bam_fh),bhdr(bam_hdr),bidx(nullptr),sam_itr(nullptr) {} BAMIterator(bam1_t* z, htsFile* bam_fh, bam_hdr_t* bam_hdr, const char* bam_fn, annotation_map_t* annotations, uint32_t annotations_count, strlist* chrm_order) :b(z),bfh(bam_fh),bhdr(bam_hdr),bidx(nullptr),sam_itr(nullptr) { if(annotations_count == 0) return; //given a set of regions, check to see if we have an accompaning BAM index file (.bai) //check if BAI exists, if not proceed with linear scan through BAM iterator if((bidx = sam_index_load(bfh, bam_fn)) == 0) { fprintf(stderr,"no index for BAM/CRAM file, doing full scan\n"); return; } uint32_t amap_count = annotations_count; amap = new char[amap_count*NUM_CHARS_IN_REGION_STR]; amap_ptr = new char*[amap_count]; uint64_t k = 0; for(auto const c : *chrm_order) { std::vector& annotations_for_chr = (*annotations)[c]; for(long z = 0; z < annotations_for_chr.size(); z++) { const auto &item = annotations_for_chr[z]; const T start = item[0], end = item[1]; //keep the auto null char amap_ptr[k++] = amap; amap += (sprintf(amap, "%s:%lu-%lu", c, (long) start, (long) end)+1); } } assert(k==amap_count); sam_itr = sam_itr_regarray(bidx, bhdr, amap_ptr, amap_count); if(!sam_itr) { fprintf(stderr,"failed to create SAM file iterator, exiting\n"); //TODO: change this to a return exit(-1); } //delete amap; //delete amap_ptr; itrPtr = &sam_index_iterator_wrapper; } BAMIterator(const BAMIterator& bitr) : b(bitr.b),bfh(bitr.bfh),bhdr(bitr.bhdr),bidx(bitr.bidx),sam_itr(bitr.sam_itr),itrPtr(bitr.itrPtr) {} BAMIterator& operator++() { int r = itrPtr(b, bfh, bhdr, sam_itr); if(r < 0) b = nullptr; return *this; } BAMIterator operator++(int) {BAMIterator temp(*this); operator++(); return temp;} bool operator==(const BAMIterator& rhs) const {return b==rhs.b;} bool operator!=(const BAMIterator& rhs) const {return b!=rhs.b;} bam1_t* operator*() {return b;} //~BAMIterator() { if(sam_itr) { hts_itr_destroy(sam_itr); delete amap; delete amap_ptr;} } ~BAMIterator() { if(sam_itr) { hts_itr_destroy(sam_itr);} } }; int finalize_tabix_index(const char* fname, const char* ifname, BGZF* bfh, hts_idx_t* cidx, int* chrms_in_cidx, const bam_hdr_t *hdr) { //this function assumes that the chromosome (chrm) order indexes have been tracked while adding //intervals to the BGZip file we're finalizing the index for here //but now we need to create the final array of chrm order indexes mapped to chrm names //while tracking the total length of all chrm names catted together //this will serve as part of the index metadata // //1) track order by which chromosomes are added to index //2) create index2chromosome name //3) track total chromosome name length (concatenated overall names including the separating '\0's) if(hts_idx_finish(cidx, bgzf_tell(bfh)) != 0) { fprintf(stderr,"Error finishing BGZF index for base coverage, skipping\n"); return -1; } //largely lifted from: https://github.com/samtools/htslib/blob/4162046b28a7d9d8a104ce28086d9467cc05c212/tbx.c#L216 tbx_t *tbx; tbx = (tbx_t*)calloc(1, sizeof(tbx_t)); tbx->conf = tbx_conf_bed; tbx->idx = cidx; //first slot is the number of chromosomes present in the index int num_chrms = chrms_in_cidx[0]; int i, all_cnames_len = 0, l_nm; uint32_t x[7]; memcpy(x, &tbx->conf, 24); char** name = new char*[num_chrms]; int k = 0; for(i=1; i < hdr->n_targets+1; i++) { if(chrms_in_cidx[i] > 0) { all_cnames_len += strlen(hdr->target_name[i-1]) + 1; //+1 for '\0' //now copy chrm name into names name[k++] = hdr->target_name[i-1]; } } assert(k==num_chrms); i = 0; l_nm = x[6] = all_cnames_len; uint8_t* meta = new uint8_t[l_nm + 28]; if (ed_is_big()) for (i = 0; i < 7; ++i) x[i] = ed_swap_4(x[i]); memcpy(meta, x, 28); int l = 0; for (l = 28, i = 0; i < num_chrms; ++i) { int xi = strlen(name[i]) + 1; memcpy(meta + l, name[i], xi); l += xi; } //delete name; hts_idx_set_meta(tbx->idx, l, meta, 0); if(hts_idx_save_as(cidx, fname, ifname, HTS_FMT_CSI) != 0) { fprintf(stderr,"Error saving BGZF index for base coverage, skipping\n"); return -1; } return 0; } template int go_bam(const char* bam_arg, int argc, const char** argv, Op op, htsFile *bam_fh, int nthreads, bool keep_order, bool has_annotation, FILE* afp, BGZF* afpz, annotation_map_t* annotations, chr2bool* annotation_chrs_seen, const char* prefix, bool sum_annotation, strlist* chrm_order, FILE* auc_file, uint64_t num_annotations, uint32_t window_size = 0) { //only calculate AUC across either the BAM or the BigWig, but could be restricting to an annotation as well uint64_t all_auc = 0; uint64_t unique_auc = 0; uint64_t annotated_auc = 0; uint64_t unique_annotated_auc = 0; std::cerr << "Processing BAM: \"" << bam_arg << "\"" << std::endl; bam_hdr_t *hdr = sam_hdr_read(bam_fh); if(!hdr) { std::cerr << "ERROR: Could not read header for " << bam_arg << ": " << std::strerror(errno) << std::endl; return -1; } bool add_chr_prefix = has_option(argv, argv+argc, "--add-chr-prefix"); char** target_names = nullptr; if(add_chr_prefix) { target_names = new char*[hdr->n_targets]; for(int32_t i = 0; i < hdr->n_targets; i++) { target_names[i] = new char[4096]; strcpy(target_names[i], "chr"); } const char* cprefix = *(get_option(argv, argv+argc, "--add-chr-prefix")); if(!cprefix || (strcmp(cprefix,"human") != 0 && strcmp(cprefix,"mouse") != 0)) { fprintf(stderr, "bad (or no) argument passed to --add-chr-prefix, should be either \"human\" or \"mouse\"\n"); return -1; } int num_chrs_need_prefix = 22; if(strcmp(cprefix,"mouse") == 0) num_chrs_need_prefix = 19; for(int32_t i = 0; i < hdr->n_targets; i++) { long int chr_id = strtol(hdr->target_name[i], nullptr, 10); if((chr_id >= 1 && chr_id <= num_chrs_need_prefix) || strcmp(hdr->target_name[i], "X") == 0 || strcmp(hdr->target_name[i], "Y") == 0 || strcmp(hdr->target_name[i], "M") == 0) { strcat(target_names[i], hdr->target_name[i]); } else if(strcmp(hdr->target_name[i], "MT") == 0) strcpy(target_names[i], "chrM"); else strcpy(target_names[i], hdr->target_name[i]); } hdr->target_name = target_names; } if(has_option(argv, argv+argc, "--head")) { print_header(hdr); } hts_set_threads(bam_fh, nthreads); //setup list of callbacks for the process_cigar() //this is so we only have to walk the cigar for each alignment ~1 time callback_list process_cigar_callbacks; args_list process_cigar_output_args; args_list maplen_outlist; uint64_t total_number_bases_processed = 0; maplen_outlist.push_back(&total_number_bases_processed); bool count_bases = has_option(argv, argv+argc, "--num-bases"); if(count_bases) { process_cigar_callbacks.push_back(maplength); process_cigar_output_args.push_back(&maplen_outlist); } bool print_qual = has_option(argv, argv+argc, "--print-qual"); bool include_sc = false; FILE* softclip_file = nullptr; uint64_t total_softclip_count = 0; uint64_t total_number_sequence_bases_processed = 0; if(has_option(argv, argv+argc, "--include-softclip")) { include_sc = true; char afn[1024]; sprintf(afn, "%s.softclip.tsv", prefix); softclip_file = fopen(afn, "w"); } const bool only_polya_sc = has_option(argv, argv+argc, "--only-polya"); const bool include_n_mms = has_option(argv, argv+argc, "--include-n"); //might change double_count later based on other options bool double_count = has_option(argv, argv+argc, "--double-count"); const bool report_end_coord = has_option(argv, argv+argc, "--ends"); if(has_option(argv, argv+argc, "--test-polya")) { SOFTCLIP_POLYA_TOTAL_COUNT_MIN=1; SOFTCLIP_POLYA_RATIO_MIN=0.01; } size_t recs = 0; std::vector mdzbuf; bam1_t *rec = bam_init1(); if(!rec) { std::cerr << "ERROR: Could not initialize BAM object: " << std::strerror(errno) << std::endl; return -1; } kstring_t sambuf{ 0, 0, nullptr }; bool first = true; //largest human chromosome is ~249M bases //long chr_size = 250000000; long chr_size = -1; std::unique_ptr coverages, unique_coverages; bool compute_coverage = false; int bw_unique_min_qual = 0; read2len overlapping_mates; read2len alts_overlapping_mates; read2overlaps* overlap_coords = nullptr; read2cigarops* first_mate_saved_ops = nullptr; bigWigFile_t *bwfp = nullptr; bigWigFile_t *ubwfp = nullptr; //--coverage -> output perbase coverage to STDOUT (compute_coverage=true) //--bigwig -> output perbase coverage to bigwig (compute_coverage=true), // this option overrides --coverage=>coverage will be *only* written to the bigwig // even if --coverage is also passed in //--auc -> output AUC of coverage (compute_coverage=true) //--annotation output annotated regions of coverage (compute_coverage=true) bool auc_opt = has_option(argv, argv+argc, "--auc") || argc == 1; bool coverage_opt = has_option(argv, argv+argc, "--coverage"); bool annotation_opt = has_option(argv, argv+argc, "--annotation"); bool bigwig_opt = has_option(argv, argv+argc, "--bigwig"); #ifdef WINDOWS_MINGW if(bigwig_opt) { bigwig_opt = false; fprintf(stderr,"WARNING: writing BigWigs (--bigwig) is not supported on Windows at this time, no BigWig file(s) will be written, but any other options will still be processed.\n"); } #endif bool dont_output_coverage = !(coverage_opt || bigwig_opt); FILE* cov_fh = stdout; bool gzip = has_option(argv, argv+argc, "--gzip"); bool no_coverage_stdout = gzip || has_option(argv, argv+argc, "--no-coverage-stdout"); //gzFile gcov_fh; BGZF* gcov_fh = nullptr; hts_idx_t* cidx = nullptr; bool unique = has_option(argv, argv+argc, "--min-unique-qual"); FILE* uafp = nullptr; BGZF* uafpz = nullptr; if(coverage_opt || auc_opt || annotation_opt || bigwig_opt) { compute_coverage = true; chr_size = get_longest_target_size(hdr); coverages.reset(new uint32_t[chr_size]); if(bigwig_opt) { bwfp = create_bigwig_file(hdr, prefix,"all.bw"); if(!bwfp) return -1; } if(unique) { if(annotation_opt && window_size == 0) { uafp = stdout; if(gzip || has_option(argv, argv+argc, "--no-annotation-stdout")) { char afn[1024]; if(gzip) { sprintf(afn, "%s.unique.tsv.gz", prefix); uafpz = bgzf_open(afn,"w10"); uafp = nullptr; } else { sprintf(afn, "%s.unique.tsv", prefix); uafp = fopen(afn, "w"); } } } if(bigwig_opt) { ubwfp = create_bigwig_file(hdr, prefix, "unique.bw"); if(!ubwfp) return -1; } bw_unique_min_qual = atoi(*(get_option(argv, argv+argc, "--min-unique-qual"))); unique_coverages.reset(new uint32_t[chr_size]); } if(coverage_opt && !bigwig_opt && no_coverage_stdout) { char cov_fn[1024]; if(gzip) { sprintf(cov_fn, "%s.coverage.tsv.gz", prefix); gcov_fh = bgzf_open(cov_fn,"w10"); cov_fh = nullptr; //from https://github.com/samtools/htslib/blob/c9175183c42382f1030503e88ca7e60cb9c08536/sam.c#L923 //and https://github.com/brentp/hts-nim/blob/0eaa867e747d3bc844b5ecb575796e4688b966f5/src/hts/csi.nim#L34 int min_shift = 14; int n_lvls = (TBX_MAX_SHIFT - min_shift + 2) / 3; int fmt = HTS_FMT_CSI; cidx = hts_idx_init(0, fmt, 0, min_shift, n_lvls); } else { sprintf(cov_fn, "%s.coverage.tsv", prefix); cov_fh = fopen(cov_fn,"w"); } } } fraglen2count* frag_dist = new fraglen2count(1); mate2len* frag_mates = new mate2len(1); char cov_prefix[50]=""; int32_t ptid = -1; std::unique_ptr starts, ends; bool compute_ends = false; FILE* rsfp = nullptr; FILE* refp = nullptr; if(has_option(argv, argv+argc, "--read-ends")) { compute_ends = true; char refn[1024]; sprintf(refn, "%s.starts.tsv", prefix); rsfp = fopen(refn,"w"); sprintf(refn, "%s.ends.tsv", prefix); refp = fopen(refn,"w"); if(chr_size == -1) chr_size = get_longest_target_size(hdr); starts.reset(new uint32_t[chr_size]); ends.reset(new uint32_t[chr_size]); } bool print_frag_dist = false; FILE* fragdist_file = nullptr; if(has_option(argv, argv+argc, "--frag-dist")) { char afn[1024]; sprintf(afn, "%s.frags.tsv", prefix); fragdist_file = fopen(afn, "w"); print_frag_dist = true; } const bool echo_sam = has_option(argv, argv+argc, "--echo-sam"); std::fstream alts_file; bool compute_alts = false; if(has_option(argv, argv+argc, "--alts")) { char afn[1024]; sprintf(afn, "%s.alts.tsv", prefix); alts_file.open(afn, std::fstream::out); compute_alts = true; overlap_coords = new read2overlaps[1](); first_mate_saved_ops = new read2cigarops[1](); //we don't support correcting overlapping mate pairs for alts //unless coverage is also being computed //this is because we piggyback on the coverage computation //to get the list of overlapping segments between mates in a pair if(!compute_coverage) double_count = true; } FILE* jxs_file = nullptr; FILE* all_jxs_file = nullptr; bool extract_junctions = false; bool extract_all_junctions = false; uint32_t len = 0; args_list junctions; coords jx_coords; str2cstr jx_pairs; str2int jx_counts; if(has_option(argv, argv+argc, "--junctions")) { junctions.push_back(&len); junctions.push_back(&jx_coords); char afn[1024]; sprintf(afn, "%s.jxs.tsv", prefix); jxs_file = fopen(afn, "w"); extract_junctions = true; process_cigar_callbacks.push_back(extract_junction); process_cigar_output_args.push_back(&junctions); } if(has_option(argv, argv+argc, "--all-junctions")) { char afn[1024]; sprintf(afn, "%s.all_jxs.tsv", prefix); all_jxs_file = fopen(afn, "w"); extract_all_junctions = true; if(!extract_junctions) { junctions.push_back(&len); junctions.push_back(&jx_coords); process_cigar_callbacks.push_back(extract_junction); process_cigar_output_args.push_back(&junctions); } } const bool require_mdz = has_option(argv, argv+argc, "--require-mdz"); //the number of reads we actually looked at (didn't filter) uint64_t reads_processed = 0; char* cigar_str = new char[10000]; bool long_reads = false; if(has_option(argv, argv+argc, "--long-reads")) { long_reads = true; } int jx_str_sz = 2048; if(long_reads) //enough for the cigar string and ~100 junctions jx_str_sz = 12048; //no filter out by default int filter_in_mask = 0xFFFFFFFF; if(has_option(argv, argv+argc, "--filter-in")) { filter_in_mask = atoi(*(get_option(argv, argv+argc, "--filter-in"))); } //filter out alignments with either BAM_FUNMAP and/or BAM_FSECONDARY flags set by default (260) int filter_out_mask = 260; if(has_option(argv, argv+argc, "--filter-out")) { filter_out_mask = atoi(*(get_option(argv, argv+argc, "--filter-out"))); } bam1_t* rec_ = bam_init1(); uint64_t num_annotations_ = 0; if(dont_output_coverage && !auc_opt) num_annotations_ = num_annotations; int num_cigar_ops = process_cigar_callbacks.size(); //init to 0's int* chrms_in_cidx = new int[hdr->n_targets+1]{}; //TODO: also implement automatic detection of >=80% region coverage of genome //AND automatically turn this on if we're doing windowed regions as windowed regions never use the index bool skip_index = has_option(argv, argv+argc, "--no-index"); int num_annotations_for_index = num_annotations; if(skip_index) num_annotations_for_index = 0; bool no_region = true; if(num_annotations > 0) no_region = false; //default of empty string for read name for alts char* qname_for_alts = emptystr; BAMIterator bitr(rec_, bam_fh, hdr, bam_arg, annotations, num_annotations_for_index, chrm_order); BAMIterator end(nullptr, nullptr, nullptr); for(++bitr; bitr != end; ++bitr) { recs++; rec = *bitr; bam1_core_t *c = &rec->core; //read name char* qname = bam_get_qname(rec); //fprintf(stderr, "recs %lu, qname %s\n",recs,qname); //*******Main Quantification Conditional (for ref & alt coverage, frag dist) //filter OUT unmapped and secondary alignments //if((c->flag & BAM_FUNMAP) == 0 && (c->flag & BAM_FSECONDARY) == 0) { //catch case where c-flag is 0 and we've specified an all inclusive filter-in option (default) if(((c->flag & filter_in_mask) != 0 && (c->flag & filter_out_mask) == 0) || (c->flag == 0 && filter_in_mask == 0xFFFFFFFF)) { reads_processed++; //base-0 start coordinate int32_t refpos = rec->core.pos; //size of aligned portion of the read (start to end on the reference) uint32_t maplen = -1; //base-1 end coordinate int32_t end_refpos = -1; //base-0 mate start coordinate int32_t mrefpos = rec->core.mpos; //used for adjusting the fragment lengths int32_t total_intron_len = 0; //ref chrm/contig ID int32_t tid = rec->core.tid; int32_t tlen = rec->core.isize; if(tid != ptid && ptid != -1) chr_size = hdr->target_len[ptid]; if(softclip_file) total_number_sequence_bases_processed += c->l_qseq; //*******Reference coverage tracking if(compute_coverage) { if(tid != ptid) { if(ptid != -1) { overlapping_mates.clear(); sprintf(cov_prefix, "cov\t%d", ptid); if(coverage_opt || bigwig_opt || auc_opt || window_size > 0) { if(no_region) { all_auc += print_array(cov_prefix, hdr->target_name[ptid], ptid, (int32_t*) coverages.get(), chr_size, false, bwfp, cov_fh, dont_output_coverage, no_region, gcov_fh, cidx, chrms_in_cidx, afp, afpz, window_size, op); if(unique) { sprintf(cov_prefix, "ucov\t%d", ptid); unique_auc += print_array(cov_prefix, hdr->target_name[ptid], ptid, (int32_t*) unique_coverages.get(), chr_size, false, ubwfp, cov_fh, dont_output_coverage, no_region); } } else { all_auc += print_array(cov_prefix, hdr->target_name[ptid], ptid, coverages.get(), chr_size, false, bwfp, cov_fh, dont_output_coverage, no_region, gcov_fh, cidx, chrms_in_cidx, afp, afpz, window_size, op); if(unique) { sprintf(cov_prefix, "ucov\t%d", ptid); unique_auc += print_array(cov_prefix, hdr->target_name[ptid], ptid, unique_coverages.get(), chr_size, false, ubwfp, cov_fh, dont_output_coverage, no_region); } } } //if we also want to sum coverage across a user supplied file of annotated regions int keep_order_idx = keep_order?2:-1; if(sum_annotation && annotations->find(hdr->target_name[ptid]) != annotations->end()) { sum_annotations(coverages.get(), (*annotations)[hdr->target_name[ptid]], chr_size, hdr->target_name[ptid], afp, &annotated_auc, op, !annotation_opt, keep_order_idx); if(unique) { keep_order_idx = keep_order?3:-1; sum_annotations(unique_coverages.get(), (*annotations)[hdr->target_name[ptid]], chr_size, hdr->target_name[ptid], uafp, &unique_annotated_auc, op, !annotation_opt, keep_order_idx); } if(!keep_order) annotation_chrs_seen->insert(hdr->target_name[ptid]); } } //need to reset the array for the *current* chromosome's size, not the past one reset_array(coverages.get(), hdr->target_len[tid]); if(unique) reset_array(unique_coverages.get(), hdr->target_len[tid]); } end_refpos = calculate_coverage(rec, coverages.get(), unique_coverages.get(), double_count, bw_unique_min_qual, &overlapping_mates, &total_intron_len, overlap_coords, no_region); } //additional counting options which make use of knowing the end coordinate/maplen //however, if we're already running calculate_coverage, we don't need to redo this if(end_refpos == -1 && (report_end_coord || print_frag_dist)) end_refpos = calculate_coverage(rec, nullptr, nullptr, double_count, bw_unique_min_qual, nullptr, &total_intron_len, overlap_coords, no_region); if(report_end_coord) fprintf(stdout, "%s\t%d\n", qname, end_refpos); //*******Fragment length distribution (per chromosome) if(print_frag_dist) { //csaw's getPESizes criteria //first, don't count read that's got problems if((c->flag & BAM_FSECONDARY) == 0 && (c->flag & BAM_FSUPPLEMENTARY) == 0 && (c->flag & BAM_FPAIRED) != 0 && (c->flag & BAM_FMUNMAP) == 0 && ((c->flag & BAM_FREAD1) != 0) != ((c->flag & BAM_FREAD2) != 0) && rec->core.tid == rec->core.mtid) { //are we the later mate? if so we calculate the frag length if(frag_mates->find(qname) != frag_mates->end()) { uint64_t both_lens = (*frag_mates)[qname]; int32_t both_intron_lengths = total_intron_len + (both_lens & frag_lens_mask); both_lens = both_lens >> FRAG_LEN_BITLEN; int32_t mreflen = (both_lens & frag_lens_mask); frag_mates->erase(qname); if(((c->flag & BAM_FREVERSE) != 0) != ((c->flag & BAM_FMREVERSE) != 0) && (((c->flag & BAM_FREVERSE) == 0 && refpos < mrefpos + mreflen) || ((c->flag & BAM_FMREVERSE) == 0 && mrefpos < end_refpos))) { if(both_intron_lengths > abs(rec->core.isize)) both_intron_lengths = 0; (*frag_dist)[abs(rec->core.isize)-both_intron_lengths]++; } } else { uint64_t both_lens = end_refpos - refpos; both_lens = both_lens << FRAG_LEN_BITLEN; both_lens |= total_intron_len; (*frag_mates)[qname] = both_lens; } } } //*******Start/end positions (for TSS,TES) //track read starts/ends //if minimum quality is set, then we only track starts/ends for alignments that pass if(compute_ends) { int32_t refpos = rec->core.pos; if(tid != ptid) { if(ptid != -1) { for(uint32_t j = 0; j < chr_size; j++) { if(starts[j] > 0) fprintf(rsfp,"%s\t%d\t%d\n", hdr->target_name[ptid], j+1, starts[j]); if(ends[j] > 0) fprintf(refp,"%s\t%d\t%d\n", hdr->target_name[ptid], j+1, ends[j]); } } reset_array(starts.get(), hdr->target_len[tid]); reset_array(ends.get(), hdr->target_len[tid]); } if(bw_unique_min_qual == 0 || rec->core.qual >= bw_unique_min_qual) { starts[refpos]++; if(end_refpos == -1) end_refpos = refpos + align_length(rec); //offset by 1 ends[end_refpos-1]++; } } //echo back the sam record if(echo_sam) { int ret = sam_format1(hdr, rec, &sambuf); if(ret < 0) { std::cerr << "Could not format SAM record: " << std::strerror(errno) << std::endl; return -1; } kstring_out(std::cout, &sambuf); std::cout << '\n'; } //*******Alternate base coverages, soft clipping output //track alt. base coverages if(compute_alts) { //TODO: need to test the mate pair detection here char* qname_for_alts_ = qname_for_alts; bool track_qname = false; bool first_mate_w_overlap = false; bool second_mate = false; std::vector* mate_vec = nullptr; MateInfo* mate_info = nullptr; std::vector overlapping_coords; std::vector saved_ops; bool potential_mate_found = false; bool save_ops = false; const std::string tn(qname); if(!double_count) { if(tid != ptid) { first_mate_saved_ops->clear(); overlap_coords->clear(); } if(end_refpos == -1) end_refpos = bam_endpos(rec); bool possible_overlap = rec->core.tid == rec->core.mtid && end_refpos > mrefpos; auto saved_ops_it = first_mate_saved_ops->find(qname); bool read_not_already_seen = saved_ops_it == first_mate_saved_ops->end(); first_mate_w_overlap = read_not_already_seen && possible_overlap && refpos <= mrefpos; if(first_mate_w_overlap) save_ops = true; int32_t refpos_to_hash = mrefpos; //needs to handle the case where refpos == mrefpos second_mate = possible_overlap && refpos >= mrefpos && !read_not_already_seen; if(second_mate) { //see if we have any cigar operations to emit from our first mate saved_ops = saved_ops_it->second; refpos_to_hash = refpos; } auto mit = overlap_coords->find(qname); potential_mate_found = mit != overlap_coords->end(); if(potential_mate_found) overlapping_coords = mit->second; } if(first) { if(print_qual) { uint8_t *qual = bam_get_qual(rec); if(qual[0] == 255) { std::cerr << "WARNING: --print-qual specified but quality strings don't seem to be present" << std::endl; print_qual = false; } } first = false; } const uint8_t *mdz = bam_aux_get(rec, "MD"); if(!mdz) { if(require_mdz) { std::stringstream ss; ss << "No MD:Z extra field for aligned read \"" << hdr->target_name[c->tid] << "\""; throw std::runtime_error(ss.str()); } track_qname = output_from_cigar(rec, alts_file, &total_softclip_count, include_sc, only_polya_sc, qname, &overlapping_coords, &saved_ops, save_ops); // just use CIGAR } else { mdzbuf.clear(); parse_mdz(mdz + 1, mdzbuf); // skip type character at beginning track_qname = output_from_cigar_mdz( rec, mdzbuf, alts_file, &total_softclip_count, qname, &overlapping_coords, &saved_ops, save_ops = save_ops, print_qual, include_sc, only_polya_sc, include_n_mms); // use CIGAR and MD:Z } if(save_ops && first_mate_saved_ops) first_mate_saved_ops->emplace(tn, saved_ops); //cleanup if(second_mate && saved_ops.size() > 0) first_mate_saved_ops->erase(qname); if(second_mate && potential_mate_found) overlap_coords->erase(qname); } ptid = tid; //*******Run various cigar-related functions for 1 pass through the cigar string if(num_cigar_ops > 0) process_cigar(rec->core.n_cigar, bam_get_cigar(rec), &cigar_str, &process_cigar_callbacks, &process_cigar_output_args); //*******Extract jx co-occurrences (not all junctions though) if(extract_junctions || extract_all_junctions) { bool unique_aln = ((bw_unique_min_qual == 0 && rec->core.qual >= 10) || (bw_unique_min_qual > 0 && rec->core.qual >= bw_unique_min_qual)); bool paired = (c->flag & BAM_FPAIRED) != 0; const uint8_t *s = bam_aux_get(rec, "XS"); char real_strand = (c->flag & 16)!=0?'1':'0'; if(s) { real_strand = bam_aux2A(s); } int32_t tlen_orig = tlen; int32_t mtid = c->mtid; if(tid != mtid) tlen = mtid > tid ? 1000 : -1000; //output coords* cl = (coords*) junctions[1]; int sz = cl->size(); char* jx_str = nullptr; char* all_jx_str = nullptr; //first create jx string for any of the normal conditions //1) if we're extracting all junctions just print individual jx's if(extract_all_junctions && sz >= 2) { all_jx_str = new char[jx_str_sz]; //coordinates are 1-based chromosome int ix = 0; for(int jx = 0; jx < sz; jx++) { uint32_t coord = refpos + (*cl)[jx]; if(jx % 2 == 0) ix = sprintf(all_jx_str, "%s\t%s\t%d\t", qname, hdr->target_name[tid], coord+1); else { //ix += sprintf(all_jx_str+ix, "%d\t%d\t%s\t%d\n", coord, (c->flag & 16) != 0, cigar_str, unique_aln); ix += sprintf(all_jx_str+ix, "%d\t%c\t%s\t%d\n", coord, real_strand, cigar_str, unique_aln); fprintf(all_jxs_file, "%s", all_jx_str); } } delete all_jx_str; } //2) if we're also extracting co-occurring jx's keep a joint string if(extract_junctions && (sz >= 4 || (paired && sz >= 2))) { jx_str = new char[jx_str_sz]; //coordinates are 1-based chromosome //int ix = sprintf(jx_str, "%s\t%d\t%d\t%d\t%s\t", hdr->target_name[tid], refpos+1, (c->flag & 16) != 0, tlen_orig, cigar_str); int ix = sprintf(jx_str, "%s\t%d\t%c\t%d\t%s\t", hdr->target_name[tid], refpos+1, real_strand, tlen_orig, cigar_str); for(int jx = 0; jx < sz; jx++) { uint32_t coord = refpos + (*cl)[jx]; if(jx % 2 == 0) { if(jx >=2 ) ix += sprintf(jx_str+ix, ","); ix += sprintf(jx_str+ix, "%d-", coord+1); } else ix += sprintf(jx_str+ix, "%d", coord); } } //not paired, only care if we have 2 or more introns if(!paired && extract_junctions && sz >= 4) fprintf(jxs_file, "%s\t%d\n", jx_str, unique_aln); //now determine if we're 1st/2nd/single mate if(paired && extract_junctions) { //first mate if(tlen > 0 && sz >= 2) { jx_pairs[qname] = jx_str; jx_counts[qname] = sz; } //2nd mate else if(tlen < 0) { bool prev_mate_printed = false; //1st mate with > 0 introns int mate_sz = 0; if(jx_pairs.find(qname) != jx_pairs.end()) { char* pre_jx_str = jx_pairs[qname]; mate_sz = jx_counts[qname]; //there must be at least 2 introns between the mates if(mate_sz >= 4 || (mate_sz >= 2 && sz >= 2)) { fprintf(jxs_file, "%s\t%d", pre_jx_str, unique_aln); prev_mate_printed = true; } if(pre_jx_str) delete pre_jx_str; jx_pairs.erase(qname); jx_counts.erase(qname); } //2nd mate with > 0 introns if(sz >= 4 || (mate_sz >= 2 && sz >= 2)) { if(prev_mate_printed) fprintf(jxs_file, "\t"); fprintf(jxs_file, "%s\t%d", jx_str, unique_aln); prev_mate_printed = true; } if(prev_mate_printed) fprintf(jxs_file,"\n"); if(jx_str) delete(jx_str); } } if(jx_str && !(extract_junctions && paired)) delete(jx_str); //reset for next alignment *((uint32_t*) junctions[0]) = 0; cl->clear(); } } } if(ptid != -1) chr_size = hdr->target_len[ptid]; delete(cigar_str); if(jxs_file) fclose(jxs_file); if(all_jxs_file) fclose(all_jxs_file); if(print_frag_dist) { if(ptid != -1) print_frag_distribution(frag_dist, fragdist_file); fclose(fragdist_file); } if(compute_coverage) { if(ptid != -1) { sprintf(cov_prefix, "cov\t%d", ptid); if(coverage_opt || bigwig_opt || auc_opt || window_size > 0) { if(no_region) all_auc += print_array(cov_prefix, hdr->target_name[ptid], ptid, (int32_t*) coverages.get(), chr_size, false, bwfp, cov_fh, dont_output_coverage, no_region, gcov_fh, cidx, chrms_in_cidx, afp, afpz, window_size, op); else all_auc += print_array(cov_prefix, hdr->target_name[ptid], ptid, coverages.get(), chr_size, false, bwfp, cov_fh, dont_output_coverage, no_region, gcov_fh, cidx, chrms_in_cidx, afp, afpz, window_size, op); if(coverage_opt || window_size > 0) { //now print out all contigs/chrms in header which had 0 coverage, only do this for the "all reads" coverage char* last_interval_line = new char[1024]; int line_len = 0; int ret = 0; int (*printPtr) (void* fh, char* buf, uint32_t buf_len) = &my_write; void* wcfh = afp; if(!afp) { printPtr = &my_gzwrite; wcfh = afpz; } uint32_t wi = 0; char* val = new char[10]; sprintf(val,"%d",0); if(op == cmean) sprintf(val,"%.2f",0.00); uint32_t wend = 0; for(int ci=0; ci < hdr->n_targets; ci++) { uint32_t chr_len = hdr->target_len[ci]; char* chr_name = hdr->target_name[ci]; if(chrms_in_cidx[ci+1] == 0) { chrms_in_cidx[ci+1] = ++chrms_in_cidx[0]; if(window_size > 0) { for(wi=0; wi < chr_len; wi+=window_size) { wend = wi+window_size; if(wend > chr_len) wend = chr_len; line_len = sprintf(last_interval_line, "%s\t%u\t%u\t%s\n", chr_name, wi, wend, val); (*printPtr)(wcfh, last_interval_line, line_len); } } if(coverage_opt) { line_len = sprintf(last_interval_line, "%s\t0\t%u\t0\n", chr_name, chr_len); if(gcov_fh) { ret = bgzf_write(gcov_fh, last_interval_line, line_len); if(cidx) { if(hts_idx_push(cidx, chrms_in_cidx[ci+1]-1, 0, hdr->target_len[ci], bgzf_tell(gcov_fh), 1) < 0) { fprintf(stderr,"error writing line in index at coordinates: %s:%u-%u, tid: %d idx tid: %d exiting\n", hdr->target_name[ci], 0, hdr->target_len[ci], ci, chrms_in_cidx[ci+1]-1); return -1; } } } else ret = fwrite(last_interval_line, sizeof(char), line_len, cov_fh); } } } } if(unique) { sprintf(cov_prefix, "ucov\t%d", ptid); if(no_region) unique_auc += print_array(cov_prefix, hdr->target_name[ptid], ptid, (int32_t*) unique_coverages.get(), chr_size, false, ubwfp, cov_fh, dont_output_coverage, no_region); else unique_auc += print_array(cov_prefix, hdr->target_name[ptid], ptid, unique_coverages.get(), chr_size, false, ubwfp, cov_fh, dont_output_coverage, no_region); } } if(sum_annotation && annotations->find(hdr->target_name[ptid]) != annotations->end()) { int keep_order_idx = keep_order?2:-1; sum_annotations(coverages.get(), (*annotations)[hdr->target_name[ptid]], chr_size, hdr->target_name[ptid], afp, &annotated_auc, op, false, keep_order_idx); if(unique) { keep_order_idx = keep_order?3:-1; sum_annotations(unique_coverages.get(), (*annotations)[hdr->target_name[ptid]], chr_size, hdr->target_name[ptid], uafp, &unique_annotated_auc, op, false, keep_order_idx); } if(!keep_order) annotation_chrs_seen->insert(hdr->target_name[ptid]); } //if we wanted to keep the chromosome order of the annotation output matching the input BED file //assert(afpz == uafpz || (afpz != nullptr && uafpz != nullptr)); if(keep_order) output_all_coverage_ordered_by_BED(chrm_order, annotations, afp, afpz, uafp, uafpz); } if(sum_annotation && auc_file) { fprintf(auc_file, "ALL_READS_ANNOTATED_BASES\t%" PRIu64 "\n", annotated_auc); if(unique) fprintf(auc_file, "UNIQUE_READS_ANNOTATED_BASES\t%" PRIu64 "\n", unique_annotated_auc); } if(sum_annotation && !keep_order) { output_missing_annotations(annotations, annotation_chrs_seen, afp); if(unique) output_missing_annotations(annotations, annotation_chrs_seen, uafp); } if(auc_file) { fprintf(auc_file, "ALL_READS_ALL_BASES\t%" PRIu64 "\n", all_auc); if(unique) fprintf(auc_file, "UNIQUE_READS_ALL_BASES\t%" PRIu64 "\n", unique_auc); } } if(compute_ends) { if(ptid != -1) { for(uint32_t j = 0; j < chr_size; j++) { if(starts[j] > 0) fprintf(rsfp,"%s\t%d\t%d\n", hdr->target_name[ptid], j+1, starts[j]); if(ends[j] > 0) fprintf(refp,"%s\t%d\t%d\n", hdr->target_name[ptid], j+1, ends[j]); } } } if(bwfp) { bwClose(bwfp); if(!ubwfp) bwCleanup(); } if(ubwfp) { bwClose(ubwfp); bwCleanup(); } //for writing out an index for BGZipped coverage BED files char temp_afn[1024]; int min_shift = 14; tbx_conf_t tconf = tbx_conf_bed; if(cov_fh && cov_fh != stdout) fclose(cov_fh); if(gzip && gcov_fh) { sprintf(temp_afn, "%s.coverage.tsv.gz", prefix); char temp_afni[1024]; sprintf(temp_afni, "%s.coverage.tsv.gz.csi", prefix); int check = finalize_tabix_index(temp_afn, temp_afni, gcov_fh, cidx, chrms_in_cidx, hdr); bgzf_close(gcov_fh); } if(gzip && afpz) { sprintf(temp_afn, "%s.annotation.tsv.gz", prefix); if(window_size > 0) sprintf(temp_afn, "%s.window.tsv.gz", prefix); bgzf_close(afpz); if(tbx_index_build(temp_afn, min_shift, &tconf) != 0) { fprintf(stderr,"Error dumping BGZF index for annotation coverage (all alignments), skipping\n"); } } if(gzip && uafpz) { sprintf(temp_afn, "%s.unique.tsv.gz", prefix); bgzf_close(uafpz); if(tbx_index_build(temp_afn, min_shift, &tconf) != 0) { fprintf(stderr,"Error dumping BGZF index for annotation coverage (unique alignments), skipping\n"); } } if(rsfp) fclose(rsfp); if(refp) fclose(refp); if(compute_alts && alts_file) alts_file.close(); if(auc_file && auc_file != stdout) fclose(auc_file); if(afp && afp != stdout) fclose(afp); if(uafp) fclose(uafp); fprintf(stderr,"Read %" PRIu64 " records\n",recs); if(count_bases) { fprintf(stdout,"%" PRIu64 " records passed filters\n",reads_processed); fprintf(stdout,"%" PRIu64 " bases in alignments which passed filters\n",*((uint64_t*) maplen_outlist[0])); //fprintf(stdout,"%lu bases in alignments which passed filters\n",total_number_bases_processed); } if(softclip_file) { fprintf(softclip_file,"%" PRIu64 " bases softclipped\n",total_softclip_count); fprintf(softclip_file,"%" PRIu64 " total number of processed sequence bases\n",total_number_sequence_bases_processed); fclose(softclip_file); } fprintf(stderr,"# of overlapping pairs: %" PRIu64 "\n", num_overlapping_pairs); return 0; } template int go(const char* fname_arg, int argc, const char** argv, Op op, htsFile *bam_fh, bool is_bam) { //number of bam decompression threads //0 == 1 thread for the whole program,fname_arg//decompression shares a single core with processing //This can also indicate the number of parallel threads to process a list of BigWigs for //the purpose of summing over a passed in annotation int nthreads = 0; if(has_option(argv, argv+argc, "--threads")) { const char** nthreads_ = get_option(argv, argv+argc, "--threads"); nthreads = atoi(*nthreads_); } bool keep_order = !has_option(argv, argv+argc, "--keep-order"); strlist chrm_order; FILE* afp = nullptr; annotation_map_t annotations; annotation_map_t annotations_collapsed; bool sum_annotation = false; chr2bool annotation_chrs_seen; //setup hashmap to store BED file of *non-overlapping* annotated intervals to sum coverage across //maps chromosome to vector of uint arrays storing start/end of annotated intervals int err = 0; bool has_annotation = has_option(argv, argv+argc, "--annotation"); bool gzip = has_option(argv, argv+argc, "--gzip"); bool no_annotation_stdout = has_option(argv, argv+argc, "--no-annotation-stdout"); const char* prefix = fname_arg; uint64_t num_annotations = 0; if(has_option(argv, argv+argc, "--prefix")) prefix = *(get_option(argv, argv+argc, "--prefix")); BGZF* afpz = nullptr; uint32_t window_size = 0; if(has_annotation) { const char* afile = *(get_option(argv, argv+argc, "--annotation")); if(!afile) { std::cerr << "No argument to --annotation" << std::endl; return -1; } //TODO: parse afile for a window size (e.g. 200) if doing windowed regions char* output_prefix = new char[100]; sprintf(output_prefix, "window"); window_size = strtol(afile, nullptr, 10); if(window_size == 0) { afp = fopen(afile, "r"); if(!afp) { fprintf(stderr, "bad argument to --annotation: either the path \"%s\" doesn't exist or cannot be read, terminating\n", afile); return -1; } if(is_bam) err = read_annotation(afp, &annotations, &chrm_order, keep_order, &num_annotations, nullptr); else err = read_annotation(afp, &annotations, &chrm_order, keep_order, &num_annotations, &annotations_collapsed); if(err != 0) return err; fclose(afp); assert(!annotations.empty()); std::cerr << annotations.size() << " chromosomes for annotated regions read\n"; std::cerr << annotations_collapsed.size() << " chromosomes for annotated regions read, collapsed\n"; long num_sizes=0; for(auto ita: annotations_collapsed) { //std::cerr << ita.second.size() << " " << ita.first << "\n"; num_sizes+=ita.second.size(); } fprintf(stderr,"total number of annotations in collapsed: %u\n",num_sizes); sprintf(output_prefix, "annotation"); sum_annotation = true; } else fprintf(stderr, "computing coverage windows of length %u\n", window_size); afp = stdout; if(gzip || no_annotation_stdout) { char afn[1024]; if(gzip) { sprintf(afn, "%s.%s.tsv.gz", prefix, output_prefix); afpz = bgzf_open(afn,"w10"); afp = nullptr; } else { sprintf(afn, "%s.%s.tsv", prefix, output_prefix); afp = fopen(afn, "w"); } } } //if no args are passed in other than a file (BAM or BW) //then just compute the auc FILE* auc_file = nullptr; //if we 1) have no params OR 2) we have no params but --bwbuffer OR 3) --auc with/wo any other options if(argc == 1 || has_option(argv, argv+argc, "--auc") || (argc == 3 && has_option(argv, argv+argc, "--bwbuffer"))) { auc_file = stdout; if(has_option(argv, argv+argc, "--no-auc-stdout")) { char afn[1024]; sprintf(afn, "%s.auc.tsv", prefix); auc_file = fopen(afn, "w"); } } assert(err == 0); if(is_bam) return go_bam(fname_arg, argc, argv, op, bam_fh, nthreads, keep_order, has_annotation, afp, afpz, &annotations, &annotation_chrs_seen, prefix, sum_annotation, &chrm_order, auc_file, num_annotations, window_size = window_size); else return go_bw(fname_arg, argc, argv, op, bam_fh, nthreads, keep_order, has_annotation, afp, afpz, &annotations, &annotation_chrs_seen, prefix, sum_annotation, &chrm_order, auc_file, num_annotations, &annotations_collapsed); } int get_file_format_extension(const char* fname) { int slen = strlen(fname); if(strcmp("bam", &(fname[slen-3])) == 0 || strcmp("sam", &(fname[slen-3])) == 0) return BAM_FORMAT; if(strcmp("cram", &(fname[slen-4])) == 0) return CRAM_FORMAT; if(strcmp("bw", &(fname[slen-2])) == 0 || strcmp("BW", &(fname[slen-2])) == 0 || strcmp("bigwig", &(fname[slen-6])) == 0 || strcmp("bigWig", &(fname[slen-6])) == 0 || strcmp("BigWig", &(fname[slen-6])) == 0) return BW_FORMAT; return UNKNOWN_FORMAT; } int main(int argc, const char** argv) { argv++; argc--; // skip binary name if(argc == 0 || has_option(argv, argv + argc, "--help") || has_option(argv, argv + argc, "--usage")) { print_version(); std::cout << std::endl << USAGE << std::endl; return 0; } if(has_option(argv, argv + argc, "--version")) { print_version(); return 0; } if(has_option(argv, argv+argc, "--bwbuffer")) { const char* opstr = *(get_option(argv, argv+argc, "--bwbuffer")); BW_READ_BUFFER = atol(opstr); } if(has_option(argv, argv+argc, "--sums-only")) { SUMS_ONLY = true; } if(has_option(argv, argv+argc, "--distance")) { const char* opstr = *(get_option(argv, argv+argc, "--distance")); COLLAPSED_ANNOTATION_MAX_DISTANCE = atol(opstr); } if(has_option(argv, argv+argc, "--unsorted")) { SORTED_ANNOTATIONS = false; } const char *fname_arg = get_positional_n(argv, argv+argc, 0); if(!fname_arg) { std::cerr << "ERROR: Could not find positional arg" << std::endl; return -1; } int format_code = get_file_format_extension(fname_arg); if(format_code == UNKNOWN_FORMAT) { std::cerr << "ERROR: Could determine format of " << fname_arg << " exiting" << std::endl; return -1; } bool is_bam = (format_code == BAM_FORMAT || format_code == CRAM_FORMAT); htsFile* bam_fh = nullptr; if(is_bam) { bam_fh = sam_open(fname_arg, "r"); if(!bam_fh) { std::cerr << "ERROR: Could not open " << fname_arg << ": " << std::strerror(errno) << std::endl; return -1; } const htsFormat* format = hts_get_format(bam_fh); const char* hts_format_ex = hts_format_file_extension(format); if(CRAM_FORMAT) { //from https://github.com/samtools/samtools/pull/299/files //and https://github.com/brentp/mosdepth/blob/389ca702c5709654a5d4c1608073d26315ce3e35/mosdepth.nim#L867 //turn off decoding of unused base qualities and other unused fields for just base coverage //but only if --alts isn't passed in hts_set_opt(bam_fh, CRAM_OPT_DECODE_MD, 0); hts_set_opt(bam_fh, CRAM_OPT_REQUIRED_FIELDS, SAM_QNAME | SAM_FLAG | SAM_RNAME | SAM_POS | SAM_MAPQ | SAM_CIGAR | SAM_RNEXT | SAM_PNEXT); if(has_option(argv, argv+argc, "--alts")) { //we want everything decoded hts_set_opt(bam_fh, CRAM_OPT_DECODE_MD, 1); hts_set_opt(bam_fh, CRAM_OPT_REQUIRED_FIELDS, SAM_QNAME | SAM_FLAG | SAM_RNAME | SAM_POS | SAM_MAPQ | SAM_CIGAR | SAM_MAPQ | SAM_RNEXT | SAM_PNEXT | SAM_TLEN | SAM_QUAL | SAM_AUX | SAM_RGAUX | SAM_SEQ); } if(has_option(argv, argv+argc, "--fasta")) { const char* fasta_file = *(get_option(argv, argv+argc, "--fasta")); int ret = hts_set_fai_filename(bam_fh, fasta_file); if(ret != 0) { std::cerr << "ERROR: Could not use the passed in FASTA index " << fasta_file << " exiting" << std::endl; return -1; } } } } Op op = csum; if(has_option(argv, argv+argc, "--op")) { const char* opstr = *(get_option(argv, argv+argc, "--op")); op = get_operation(opstr); } std::ios::sync_with_stdio(false); if(!is_bam || op == cmean) return go(fname_arg, argc, argv, op, bam_fh, is_bam); else return go(fname_arg, argc, argv, op, bam_fh, is_bam); } megadepth-1.2.0/megadepth_detailed.png000066400000000000000000003263531420302544700177640ustar00rootroot00000000000000PNG  IHDRy_IDATxݽ,Mz0X6(K!ikiKkhw52&H7mXCuΒmgs'/H|22##3}3">x}~I63+f`$pux,H8( k^9t0 X;@eY. P߳ӱJٞ3*K{[XxA=TMm=>I8bq:XmI8^ʡg۱_%H+pl)˦9yBv^:w+VXo^탄 k^9twTskױb=N8xEAcԯG4@uX_c >V(P_$ |д@y6 CAm|zdT!U(?AQjC4[)%xVK0i&ǻUc'[H8\ scpp*.H89p@iOa@Js DN 3VhQ#`rS:5 WB!;U'+)<+9TP'b)$y[kI8b l=XZt_ -\ LYIG]>wk{ /Aܷ ~-:BA݀ex `$PR߷p%0gYm1㼮XU+`rD "Vޠo-+?PJꛀ&ဩ<+=O8 F,W@_+D"Vހ$I84 LYi rfP̼L muJZzYoKps&Rb$0ԳXGncߗrP3tk& qj; muJZZ5^Tw> $0gm?MT x7c i S\}n 1w;'`=IGok ]'t$I8%0ԳRͶjq#߇N1֯pP{I ` 7I/,L*-w_*{̛m)oI8%PYiȶ &٦|2BRso Ǣ{.$_f=X(`0}mz`6zvn{ykW;x=DŽz^c<:_pp-SzVʝy㛒,qjgSVRsOa} o m<%fXOĭg=d0)4\/#֥]b[(᠌Tj>cl =_0TNއ:}1KL<oy:wOE񂡓bpPy .1& ܽUbi7%;"kʦvPB}:`< CV'vq>FBڸ$&3>h0ăY$j$jOʶ9w'>ghkWuT8f|ozPG$@UF)fwL)Vɮ9YfR #9h9ě'ιBfsZr Da3uSp5D@t*H8.' %-T݀yyxܬB crۿEF4KϾ=Tkm*u%IlN+u2P{Y3٠vPGwl$@UF)ʹƮ)x )oR=8.*p8@T*,١@XKnPd:3WO8X~ 44u0R eY)OHG=Pmigyc9[C4mW_,Co=Y2h@]3d@{oz<{)ӫVfrS֔ aԬ\ 9UFI0ΠVzxJac-GǯpLpi%Lo?ӧFPI)3KlRfmX'C q9}>i!5*%)S냭 kT^sH8X*;dĹk$@v)#PlScg sHέ;Vw7#佄'xNs˔36۝ c]!$ԓdz@At Ub>t1-p0kqN1ԱS ʗJj@vc_9}9$Jnˑ%ڮUpIRZӑy)1AR>)$R<\20/܁.ѡ up},9Ȼ(Wԗ #c=CH8#'4t7[IϡNS܄gTXoN_XV&L}lS%` rЎ 0өj-uMM8x yg:‹ts.pz}s,8T덏P9Wg2 v藡lt>[֛Iv$wX4'S38\Zu}~ńFzpP^ےϔ RR^(on: Lq$ƽ ϩ%޲P>=<}J>Cs/>^*-t|792 W>U?z6øm[>߹ Oґ>H rIlj>tRVHek,z%pY5Yn9SsHtsc:gK(Ld2ۃ1A un&x@IɵcrIH>97a㫂)si4s:I5,w@W7qz%] ~gːD(mCJh]̹ `9剩S̶PS96l{6z_]M>?Z خ;!/0&RRa))5fatKYc ׮ERb%uXttd6g[JN{>;H8v!-?v0&7eNjMH8L8QgK$l.sLI)mB(w*u߯p;\befs{_Y]^絈_Mm/-X<"!}_emqڸģn'p ^IëIJs?PiSR}ug fj={ݮPp;;NOc%>qƛIȜ =R1mp)Jtcfb;WV~gJ/gp1@).٩M8Itȝr|uŧ0~u ܄̲9n)3ԔsRoˊ.pdorgI:*=O}NyeKmryIt||7*_C0L3cL9ϕ02vA5ޡg}-*Kl2;$)L1.4 ;4)NYN%ފ~_N~$7VQrc35Rr5V澝{Aq}^ sBP*)AϠ1s"`6q̽TT8_8~YN3F=7Tc̄e_l. z|5gM!_velVLt ".穯~% ӹ Pz+-w4vu$y2ЃsO8X: `N[{kJǽ *TsdX8'iRS 2w|U8/}_V<9Iq?Vj/Ka.P6tJNJ% T0B@~c]u,%5焃Rmd@\y6=O(ާ&J̊TR `s̹) dñl24Ycْ%jj {/*3gJ90絗 pP/5j7PYG%@t@!3NNsp0)שut0$e?b2sEya<}086yJ[sM>f:x R/Je/p3~d4滂mWO8xMv)~wNaRJmImyC6#nh,Tr3\2su_} J@\Ǯا\bMrfg0eB9֫/z=" +9u`6fp1o'PcyT^M%tp5ZL& چ$= ,#)ԉ3\bMԁtJg`ۄʗ^}1XO-ic4ޘZUއc3ZXN! srJOaփEj4TI8x*\]rkw_-I\ $ɼ|, wbyd [u9tWxݶMB Pg+uq"{K8( H4K.O}z1ksUM`n% C^.k,2%R1]MZ}oBDz>}ɧ6mpPמEm7ґD,lj'vl*ۭAϭĚRpqx>փ}~P:͚mmĮ3A `JJs۶]j,6nι]{fyn%' d~w 9ͩ/*އg~\k_g1 ?wM>z1iZ˦%`*tUn&vYs)[1K)ik{L88i{ZƵVAovH8ȫ#C4%S*f)7ڇB[| miu]랾๩ip6km ӄFpmeǚhSWA̭kK! |FNvs.S[nT $˲'9fGKY)=+q|byjT}RFALj[yB[,sa{[!p[A=%0K.cz}>fm Lubq3junpf!95 5` "4fBGn≯<5bޢ4;Ǎd m:ӺOVX5儃J\ksO8H11L+T`>?gJI^p/"#ӧӹ|>p?ު* /8 e삄 4 9K,@0e%b ePw=cQ mnYs[abmks;v8Ĕ맚pX=pR:2}:13[W䚲O ngt1b' K*H8g)z^x/Z{VA bB^*c_k1cQ | _/`[s%L;16g[.´WA%`VR:2}c:5evk&vũۭ,1&U/ٞnK H8s.wF*} 4@c~?g~ns\g1JmWO8x9s?E ^:ύ3=S *E.H8K>slJv!-aK P#>ͺ`f[}k&L-[:CtsםS\uyb,~#D[4Cpgy#9_NxO6(9]ovkY- _WI8}nj/vJI8XWڅqj΢<q)ׅ:3^[ }_NJ):@S{Kӷv1.m&V_~$ &xpulЎNKsrf63c~K8g Xm93}8p3 :ާ^p;&TH8STE0Hߎg̀Ց1xR3 5+.  /f|Gԓa hXW#C\9W|\r|ʍ&%}cBT-ppٚS3֌*C,6*ܧű9|i%X.y ϕ=xE0l>mȘclj'pKKmܺ:11}R̬=R0`k5I8p0ıDŽc5my6Ms"%.+16l*ufAes)@_O|ːӞev] 1`39H8Akb:11Řp5R;stKq|>C,/Ibb"\Vi ׽7}|sX-6c:y~K8gZm`ܷdm{<ϧx> |mԛku%H8 LMDP~[O~[\Sj.4FAv_g5@9& y9Ӓ N8TtJ֣>ZhwT_K*sN0f=5 $'ș4<+M&Li fc?l3e Z>󼙓Pj6ZK$-\[$-cڂ9$Ը+;/ض.ѱpR:2ΜRY۵rl_̠Kl!UXb+&v<sV^gԞafd;p{0n* ]AgoilBb&h9B9DS(-p0/SL8x#{JEᅲkc&c?׾{eSکׄ!ʝ3+mәL-1XR;`h:vNiki*N !?-Potaf@AٮqNcۡ)5zl Os}l]C|II{Ph* S Ҙ77+%xfj۹D p eh%1%۞̡f,'If[Ulw[y}N9n} %P*_Ëw<qԟzi[Jm%fe]s)’*t&R2:%f9HI;x ՛U!w`!gӧ1;ӫmrKSgz qL6۩Dm7o~XL5-'HZiwcrrˌrmWrO#է[3.Ăǧ qcc+G9M%u%@.C|jFܷsRWanKNt=R)5g=g)5KC9*l3ڗs;OJ =rq/i=+ pP2 }]qD)kȤ6vXL5`YK[>a;w.2sZ/5qd[*\/)}9LOK8ϱ)I^npauJ Ng;TG-g$51ƞ5ŗP׷;%q : L O Y2Av(͉ L;^kJ$Ϥ&: 5^Œ1*H8=ϫ7m[({L 0TC橄 c%]QR+U6^fӜeWbɲR]܆2 x,K;1x u'1 >U>oe}[>PnP~ybu9W}'DsgJf$LYiuòsmmf B 2儃}~3P$m}\G~[=Ss(c{bpP2Qegso/+:@;]>C5cG)g+uI#%:5;%2_hYSؤBd{ BWHmϹxf0pyӻBߓPrǫny]鸗Œ186 ܞ= xl#@=dA}ekDL9Q*w{'{FjoOgS7R]%9eY,;)~)pYE$%`UmYDn%@殱%ޜnm&ԟ6i2n3O^ G8vN)L'd5L7V>5t%|g9n)Ɉ9Uvϐ u`%B(?= S2o´+ }>zlSrc=8NIu7& (GxkUӛG%Կ=Ys3w±5ߦlo}]r*i% ~uY+^B)>,]R^˛ې׬ BYin[j> XURν6 >䴇 2A)M/÷0!g>ͨ?1 ӻ.f}re.U˖vMQr ʶϨ1޴WܹDN';u7$(t5,w}ߨ9h.z} d8 SM6ȹ6N#CP2 $LYi>3u`i1SMpyp0/sI88=Nqmߔ-J'U|M= w)'$ {:Yg>I80aJ!7X>@:5$\kƟx|5vaT{TƶsJ88g#مdk=8 <,99goĮT9 wۦf95"`^pp8,'P38걓HJ&,4w{*T0Y7r[5޴*ԧ\s)”ai*סZAKV"J N\BL%j!VAf4z.IR{M}>{xm^EsO Au' \YJeJ`F=RREd k7=U;ҁ }@92PpP9 7e*{Uslp 5{-92 >{(878RcƋ@1p0~fT-훚e# |p0/sM8>lxǬp!ofă]6xă!vc. %_8ə *ޏƛ^H1@3R"(7Exjp#:`IKȝ:@kBg4g0z]U'p.7r^2P9{rx-Tmv}UK8( !*_a9KH8=G#\#e b;@\:k}ՈkGoz5T|D0\[7dAUS)Cv9:=޴ p1[)"DG5a?,9 n-'TW39n/cr ʥnN@y/#C}zwnZm=]i |'Qg3~y}n'#m)ԟ[O2n[O=ASZRx re* 05_ac&;$@` .l2:I8xO!} [bͩH8&ҟ;c|>vкIoҳ_.xSXrƛ^.6|Bm['; y ƛKH^.w=ls 9i{.B|{w: c0prS:f{pExʩ3iyM{{vʸ#MЭ%J7 *C^m_ }~δz0GY5Mc_wu>7d:bk:RH8`Ǜ,PȔ$p$0gSe|Ndxtwo:9 |0]0P'` : M͋6 -w^,s8pW2xSӿ^9ukg7-n@_Q37ܕfBӷK$Օojf40qMv*| l6p>8d>^7ʡ`rƛ|qBbH8 &$ ~@4"O&@ p? 'D hDM ~@4"O&@ p? 'D hDM ~@4"O&@ p? 'D hDM ~@4"O&@ p? 'D hDM ~@4"O&@ p? 'D hDM ~@4"O&@ p? 'D hDpiZ9"I`7g?qO˿y }S럗_$@ ܯ&`ϻvowȕ$@ ܡb/ .%"43 i |$@ ܙoNDpfffYO`k8$@&"OtLg?qMR@?ς#{ G˿L?9zEm~&gfR8 pw>|~o~Y3wxdya.|c9g?xf81-qq9F\&|87|Ω7?B]~nk.%:SuڷCY߷f5kRݸ?:.S@'p?K),}t \]<O/}O|2 gH`|k&㾿|?n'5> ~9׾>KNߏd+>oCnչu| gf8w~VԃuwkYI rܚrߖOyQw9u;'@ ܇v6'H{K{nܿ vvWaօ W~N+ppiӌ.WtgMTsu K (pp(Ĉ4x4Is b.գp'poSRO?}4{+zoogJ~ӔLJOA>vgS8nAw߿fe}~ƥi?V1=rf"\*ܾM09DsOǦyop;3g0TkՅ}~|/Rgg8%4I0p)Y{ܺK(x5SIwf&1TǧU.  C7[;.~y>7 ٥.h}&F| jϛ ̈́N:;qݵDOw\ ݠp{N}4B.;g˙>\Wk}JZTo5o1KdD},2~ pfݵLåDr ם})P~IgnP1k~sC-XxĵDntfTTonkyO ܿfߛV?~uC2ﶖz9D}>R>[>ϼ wvk1ACBsT $yt7[>~'- ba[BXѻ\gm3;C)"O>Ԟ֛۟t Snc'$DKlϐpC pA%,~;0~mfsJģa'0kֆS9BpںKz\>[;w,{vNrȩ!"O>tf8x}ՍKOA־n7vk ݙ㝸¥}Sv"D?ԅגk[w&)\Ĕgtg9hψ$p\;C&(D}hݟ3%o'4 ZN8HkX2 sq0MK;#Ct.>sx@J~_~^\Du.p?}KtsK8h~ln;|mRykeq&rń9y }<ԉ=-&4KfM8|ܽc};"ON"o2!IXh}clBV Δ}s(QgV$vغz q\}J"EL#k;B:^"O~t=|l̿H$y3,EquH:f99u n Hwȷϻv%n$3|Jw:ڔYΕq,w;8|+9 pp#!9j˦^d%dԅk9\)s],Ou K7i1z7)@[S tפnޚ4#B))$|m(q+Ap>3`4 Y : u97ǹC%\ ulCΕ:uv1=[uZćMzs>Έԝy1p?; `vOmoo\ ~ 2ߚ-!";ånw6seDC|Kx6‡>$f4?uo};Ǧ=ť9;G ppHQif9jxz޼u\M-Y;%@ ܡ֛~~Mo]on77ٻ??{7|+ vnٚ>ۅ}8[Sy3MHvznp{L}W'QRq|7|a;p.!F]8fK*(}?Js xo?'= @ϛ~^d, %@ ܯnPvx@ǠąKop\Rᴏ%O?.ؙ>,uqnqy.Շy;lq~f\Cg63TASr#gg@4čru}ěL}e"O5o\_x#v{DٝOwx{or[w7x\Ae OoJ.[qlxLw)(tc7q| gYbss,!ߝ9t.;8t.%&_ߟgF8,u!TDD{K|~Pq!P_>$=YX`zu!%͹5{W9W~/iozvaˍ#~@97oV/T?&X6>{><5ks ߇E8ۍ$_u{CPpR pP?7/oi~vzfk_M]I48ޯ?w;[{j6p /ծo7V?~WsJۏD fk 5M}pK H8 6}v dOHq ߑpp\DM`R>𫷒?fJp8;A_>գcoaP I8DA|`~鄃vbĜ8;3Y7?;MRH4ǡI i>|Bwh>ܵ-#x b3TXZ8u:lB\-O56w:W|%P˚D_f 8ښg}x׿ٕٿ ϛS??-tKNI\0чq6l1c%z@?'Qv= '?I:h?ycT}uV(񖼄orA;I^PnKpMȝ_~H/-PmG[#a泚pHs{S9g1{L}3j|+pmic~cg8|F{{?߷ǥct8~Kn5):syso ݄Ӿ6u[y:ߎWSo{y߷t=߱m׽e{'PF;t1$ MBI8& H8nzSAjeb?î٧k&]2>&K\J|dďڞ{Ky Ğ疰n!{oN3n\ۧ&P}짺=F^/~.Օnݼzn|N~2f'ܲ&zzZu\^uڵm]g h'4gg9hlcP7͛$A>~};qɉ8۫ghLg6w1:%*Fxv^8[;q~[g>_>/8-qO ` I8hV>tjumsÛߎmȷc9&`^n]kc.n1l#YOKn!q+o3•dVBΥk8W.\g?us{ߍq?׿y)pKM7 gf?%Km?K38tO&(g\N{ii[ߞyWf8$)7W)0{K88}+. OKt*?89;Chwn7p}Nod?;;7KDz)A94 t @gg5t>ut޾ߙB /Th~wny]g]1Kk;esO{@_Y s͖Bfր3?&#X~$4tvLdf"`,A3nՅy<6Mŭ|5 0)U/cA;|!6A+xeZL@`癙3wyɄn{C'|qNys7e>%\fKSmsAKǶ=[kXs;ouGd8Ժ?k8d>wi8- ڔMbbxZ#\xծr[G,b >L (TsX?k֞\*NyPKP˗"P!mf]~!E(_N.X2LKbwDT_J m#VaNh j TFyî'L`8(Qmomr᠐:U a]l ]1-\kQg%&9AN@Je6j%؍A$wD n?{ei<t*Gջ'gϿ>cA9 /e`© "?DHNLpxk=ߤ!uh8 ֦6zA}-zGK˔:ߖfؙpM^w 2sb8pbo]xlc<X78cMS4\erpMiSi`1lhxlÁPp{1r)s}Z!۾?%ÁNn݌D_@н(C$k3%pOT 尥"g8h7t}f;2,h׹kl{<?dwl10xN2'(A~}~$ÁiПZ1Aj |UBks~f llS.`?9{{\ŀ@IC6PRLmFCppɿ/_jup`Lb*V9:g'jwoMDaQOyw;Si35&AQ <;\3\R;va8W6C5p }3Fjo?e^A|A&U~^cpzqm-Q q\-,9dL  r(8@n?ZgoܵqjG~W%ֽz$7?\?]itR7} {bs5u M<^l}暓 4w[ʶmcw+q ׍׽O9f9tK7ks\Īb1JÁ[1_U Wiy9>0_}F k،s/.@I:61խ/Yѵk2ȵbB bhu.OK%=`@+؎ka `ôMyk(eX&?<B#y !6"C^wDܔ`,>s$s1v#s8h-}mI D'Y+ 950I즤Znd9esyV5s55}L^-` p|_j#*`z$lD| vVIkzHR+klgۻevijg#_qXλseSⰾ#xxf1ˆFP[N?Whb]XŌ [9n 1˶sͽ5Ǔ6w}SGG.O[յ+\لX-Krj8gq/mK}o6b#/b >Z ͚5;R 95Á۽ҷ'eǒX8͜noup E;W8ϣRc2Q$1%[zj;ELnV'MR+XkI4NR&QlҙU)>O`,<;`#EM"h2Eʶ fN~x7}t`EٺOzW9et"v"dtn$DK:&7pٶy:V,5svGֱXѻ϶Я~.]F0K5+v6˿c}e9yp0?ؚ>EfGrch 3g6kBn]g4܌ZQ0u^E}6V`$OZa7u_ "%F۝߱?W oی7,lV=rk_ʂv5odh| :Y1ݬ6މBj ɱu㯹N`JLAoKj}yIj 3eJ?≋G|!m9 Cp[:6qA* .ҧR6 pJi?kX3YwU3&?s- i"^F2lR9ݎq9ӎ8nE\k.ts{ݘvG;m_~ AjdDo2kDR!g|Wێ)p|-S`׈ ?U~ز(oTFĢh5K\CSc̅d A"t5WGRʬ f!ed }Z㳩-lp=فKdݵorkzؔ둾K}>AIj $xl* B$l`B5-R]E9' 4#4g6}"Mm~71ovg)2JY L(Kb{,Vtn1LX ~z`dBzYDLF% ,v-Q glhp@PRAr*4VMypxc8'[̓REԃ`8+$W`r[i;^4^; mT*VcL߶LsS!W-(Ͳa %UʘX8L|`~Z3]L5Jd#SAo/8dzMZ~T"W }epxc8'[EL6 {]NT=&Z>#CĠL2BnSlhȘLe)Dumcl +,cUϑ9Q,Qm88uO|m(3XuiUP΀p@goxc8m12}'tw; [ }۾f|Q__Z6Ŧc(ϰڌQO`ȉs.D^9fi]bg':5)tvɆ|s룹m!1xÁ ,.Ie6xx2뻋7Dd`8'ۥU*1`+\+7\V+p Y So1,i6B,E?:l @znQAdpPWa 8~f_.^c|x)n Upu>~~^㟽&"`^& m 4u*mmo+>A1H,,MnZ dq F@эOQIF]LpbΘ^͝k3/S2,'f$xq6\Q"y(WZFp6WRF?9Z,W5ԈA[=Lj p,Z%@uY!yƖ;7,ps-c@npК!`__|)@DA 'zQiep Kj3ARPZԂ֓T 1G hof -[!cF&-SHn'6 %P]aϊk"B61 8wAMƋP2/-n8(ݟ3n@ 'ym]hϓ~49!8ҶwD[b`QF͉5o~#R|' od8h!o0g6uV~#w}}9Y5 AѶf0l>csm5׶ 2:#㓹wb8P$` <#5촶hgq*r[ ۈ,Vt,,V^Sg3IJBx.|Q[a[B([ܳ 6kD.A鑆]v6LpK936ֱ'f7ٹ}m63a8DXEr!OWZĵD:/uhcCn`YSA ^PԢCe'xawze 7<[Yb;W,z#G+{_pugI5Kf,ⰋZWCKP1ok5V4;L - ܿb1k z}&> T_F]eΌ/f <ڋb8\?u,}ۘfxIF P;]ߊ+̷IM!9v76c73 T1Y!?مIE_C`(+p_f7`OM] k)fH6X e8HT?r5qOb>wqYj좤B˽ޏglPxTҋˌȷW_T0P{>"l:@VpLhKדҤ[a3] Q ѺRp T2l-%*juXvGlEoI}^ظDK$ Y(@P}5SL. {}jpM)"yț:PMW97׆6D R7 ǔ!옜_ϐ܋Ucq4{A1>S29d"r<_> jZffȍǙ#s6i-ylZS6?zj3%TǪܡ7]J$unSAugMّZQ}y̓n炝3ծ6 ]`Pӗ={&?9x}|k?)Q'pukDg}ZYDqg6?ޜ/Ad҈Dx3$85@krB  {]fh*;?uw|?_ψ @Nl  <cBT>\b-cS4ȽĀ+72D?`/{7{ψ@?~~EI<ޜ߯MCJ*=r0`sejעuuҧ_o]c8PzDjp|}wZӛg:dq^čm8#a1.ԗF`R}z}iuꚕ"6~Χ/<XC,'9x5CZ+4}[ˡD_+'&k^7`^kM]7as/1t}.U0cו{\\Tĸz~{ʞbݗؚ1豍bֵUs_wk]ލ]֞SlWE֩K+ԕ!P>\e>RR}_݇xПCGJiȸcc#w|~L{œK%ſھk{BO㩴'ebpMQ9W'6kŋuz6l|kbǩqtum3w5JcJzwl zg{D<שsߛg7HOk#qѼ׉ )Nqs~oǙ2245m6Ա5 {v>EMŚ/zKsQ˩C5bvG3gX\P" xOel8ϐԺ֘|z߷ vb>~x$6׎>W-¯|LMc!"n'}-/ߖqk(ƷU8J9N{]Xd^+ * k#^kڐxm^- fo#BclV;pegJ)11\b;f|D0"zLDB{߭-w/K"ʀ wHgwaw bv]) 6jldџnƙOJ;\Ir9K>q5Eej>Oެ]{XX!^5U;ދVn547ayq۵%s_f.B(aқ7k^);Vxwi5;(DncdDX1bPLHJqrz^7&vku;cGwX[&g>/"FSӫò"~M/IG1SD9gt_Wt{AL@CX,(Fs@m6G5g=JΟZZ#p^./.t?^0)y$`n!ݾ+p+į_?uʴ1 pŵfmI1 }&qʼ2/.o;ϫs!fjX/aډbmD3't1"41@T+ZY17&AQ/vj%6#p݇v6ֹtVBuLMmYh;`@2wQ%H^fWsN`w/SjKrm"b*glL3,kcUόe|݌nr*t54*)Dh[!;Okژ_U1ͬC66)ҲnRsCT2|.`.8]-|ukobș 3T$ 9῰ض1ԗȗHCSVAmd HԶƱ8idOkZf,ezp`?#u,=g쮟'sb圐mET;%'' B){@--Y /_)BM|@>u(Q3N5cbۙRF \eD(`^&%9̮j[\2$L8658ATkn=j25Z۟0}x]Ƌ}58ӋyO|7L9bZ,)24}Q 4ܓsj>˜qd?Ͽ>{Hݲ7?>e yA9'&0oM[^^Z8 Gnbmpr<8uKYj ֽ1[ؚ`BmvFRvnJ 7.ˁΨ`v/Vtm0,k p)G *ޜd\dֱ?(u3G$[*u|0,6`O?}OFCf.Á';E8nwO9lh8Z[5`k)=Nu KK{^cȵS2KIKMD4l\Rߦ᠔ϛA*CTح`Wqnh8m_%̢"Zf"{#zۻ2l/.op溌Ja4|zmTF ǹφqQRpvNEl.;P!<>)wCuO@ӗܜW2J$\j8yagM5L7i82mbb8ȕm=@eh+ĔHSbr#Ӂy+po~~ })`Ms佹Cg@sbWM!&IsdZJ3))MTjxnGyL9:SЯB}|I$-1zA-TBDFTJ?UleNÁKEtΕ 9XÁ^ץ0h?!ôizsnj9 Ȯa]/]N=30TgĻ0"BiC0O_\nvS$sķ$C80s Dzg~.#H|TBKHNnz_zGoJx!3P$)u.QAP"A QÁ6iTd|y515={ v'wQ@c0|e~f刭`}e/~l(@>b&m`ϑa*? p |e8Ѷx2K} AJ/.O5|%%jkKl29 A}"V !h -Œ,vWi"+vAD}nCJy#?v7v 6>1Й .Xx[;'1A~lHg)Z LFr>y$0')$$ٔ94bG{`8p8{ρq(1x> d9{ϵN Fh6(vIiljF, wF0-0]iɯ&;ǻşBzCn]ks\Vf.D BZcUQi5pPJSq_Cxβtc5Uver%dAxptkG5hKQD2o)C˼Y9Uwo)y <"j^DR-Gwߋ^gW&$@ C)%ۻe?9h8Do2c[:az~ٹnn`E"A9̺k3C5b,뗘;1RԯԺ*E~x'Μ+v΋`8r Y16 bq)9 Zi9$Ϝ7_\d.F׹X< 14"!ŵӛ(K؆@-=n[v硫Rj|IvNݜ?Ҍ V%kEЫ7kXq֊ne\(Įĝҫط* t>pX) IDATcfZ|v]c51X{MhJt=#ÁMՂY+l騰QEĭ<D8];vI؎47n{CSvHJ&1L+縔"epI-eth}٢pi8w"zH!xudfA4\@=]@2GsR"z8+CIp` >*aBF˥+Dt4@RB ]HcB-Kgp'ɕJhE ]}bicf+wsQAeYH/65uTc8ݙ]cj)ս?Á??%"-EbRok {-oXJ{i8pʡ{Kf lyE^u⩔6i#Nq{]gϘ,v%)G쐥 7V<"[QɵvJ!dbܕxT`%i-{0K|LwycAinbQL'hl A}ek)P#_!E!:YF-(D2[p i8p?`vsScu{mb*&6t+eO/9{~ak!nmCS%1 q&{/gS S``S_sտa~Z?zΒ|ֶS{]Q͚})_Ki NúOۗ(<0DۈikLƟڙsvA_2G}ɓREʫ ˇ<)6>>3;?&9'#[])en=a(ƽpcL`M_>4NL哦e̡5ä XIh. b_ke8f6xqZ*?޿sAuRZz_N.f0m0>ukdakf|}x31LjO h{̑)x@=VΖ@;=|m[Cn׻9>tv;_{"D̝b8YB FPd9llv`SI6XAek.bے"eJKee'l AT[?!2c]71&d,~\ `c,Gf'.a!'z0'}+Yr:3 1iӂ}|~e;VH%!c5Hl sĽBN/hDAPMo )cBMƀbL7d>e-QziNMZhQ`ӭs)$58z9'[ Y֐k{p2$Pʺ`S܏vrfs6q Cژ0t˷w)OJiUؚr1Y+uΧ JDUw`յt^`mgv`sM=:rkL _{bh[Dﹲ42s3eG{ U&a&2Ƅ9 nnkD-"WLIQ2dK]yap`$'iͳ@M7D d05V1<aWQ_ߝMRj8^{қ bE``w|]M.ٹ^s}w %a&3@t:H)Uփ$Q] kfX^]ـ <S cT Vw;οRI 7?> 2]n@_[+.Ajnă+{%c1*Z9N@ :Mdr߉<bkoOx\Eo~K4jLA UaūwO&L96J>`_[Ra_ o~|jJ%f\d8CHk @@-ƋXi8BRٌt_Ę1p`ni}kJ61DA 'iGEei@jߘ`MU;>vtw?S/ ָj-]`b vnRt8j8(!bTeY@gˆW .~n9yȼIL3& 0;D174L-A!&AyŘFs3C"p UZe&@SFxg4p?W&dkƐ5_8@ޤ n=׌ύH:J(cB$ƽvt}S26+.#Q_@ '0; R0;#~n;k0p* /7or|9*Vqy`ӗݜ/}|"蹐!"?D) D^uGouMgX]sm"²)y_+8D7Ϯ 0ÁEhB9 @ SDע?r~@읱t\cd,Sfg׳CG7f8{lυc3`8~!B?H[08ȕ4JĄ?}9@d& }pU"cm r!Hkñ!2`8p;T"-ʾeH^T\&6Ҭϝw_@h8^֘sme|mNOBeΘ׎5?ȟk{E#W=%;Cw/R'\ xz5&9oiBI $LOgXI+#*7#_ͿY~}acZOn\}rsۏ;ϯ'ﮤdb[Kq:S]nKk @=z08 京"Xdej(_ǔy^ b6AH,L9ѣqIus2c׾9ک5oL rJV[&#u^elz]Cɼ" y\;7_c5G's\\Ԯ%8aNp` #ѹa\h>{L ˘@E(X\bߖ5bw9<orr+"^7b{p(n}1MT2v_ŧ&kCt#;G11& ]TsY \9BB ]vC "o8P ڶB~7NR;XE,F%)욌?\O5ѯO_ZsCẋYU&.X+dy $vDŽNQ}ĸRf !MzZ5#Gm;sqaz 3KR挘Id´3!ѧc3|R:w8Np`QZG2=ơ\kiu2udtH̋"']zkiQk @)6ց+)Q}Ls/);S*a2a$>>%`{XÁs+܊hkr; ;48Eeuc}oۑkтtw{' nsHQpc$H7]2ݟ3O_Au2FX߆H2 61C]'k>87-8NE SӠ?PW"!L[?M /"R1tstP(uk%1ۮ-lVd f7ŎgX=ȍ! !RR:> " cw6^d{^p/.rye?ѦbucCFpo12) #Cf׾`x6 F&Hp d׮5 >2 46)G)Dȼl!~eعdlwo3 )^5;}\{?gv~n8O΍10e82$LT{{Ԗ(AP7i`8]:&}"8DiX;vHp.WAԶVP})-\I`^h(r znFȘ r)B/.0F@IPK\ކl3.d*x,8aC>um+6OQu\zEmP 7,փb-\2A)^ Lqʭ81l+*P2>'[w A!=A 7WL]Lv>kJ8[YGA *ր1|l1 Dv"j!9&iѻA ޽(qMAM.Z/ka8p}1)0dDX,b+1S9fAe g*; w:5mȉcopP2 0=뻳߽Ͽ>CMP +*_uyr?\y-$r"1˥\#w2lµs ewkCǕ(Z֪]s&\KߦX"S;GMzt%@ "Pjzk>!? vT_#'ڧ2tPA).7$ Ytnγ658SFട'pzwo??8,61Ώi1Áz߫3,VeFfrΦZAH_`IS%6]GbF&/_o *,u|-Ι>?L~c "pD}i vQRASݭäLCM0ڭiYoK* >J*ԶSk86d-ʹ-Gr~Z 3!k6pp_AVDט9@Jp/.t`?@!YL5F󱯆~O9F(u4`[Y_\5Hr,A,$:]Z1@25Dܰx^ 2ilb(3*6Ɉ1?rƊC#n8\N`x솳oV&w˳o~‘ǧgo e80i+kM_\L "Pk7Ly7%X%3ږ`Jٱ۬9ф -KVH惚6D9 :KAB.h{m!~Ӭ ss!`Й%rOSaΗz>f%p1F9 OiMpP*Ca<,1=b,Y }Kp`U#fwm1XnD^1sIJ0Lig[MFzg-k!s>lfbe7d ]VWk1f9J4p9qwd 4.csu0F| us:@?J#0D59-qxlA|ߛ=Oั"rOWk8 ,]Vε#;Sj+jq\lS}2 X?ek"[R{oK aA&eXq%JBI/ߴv{ͱsۊ6 X(- G:yQιͤ2M؞5Á)'p`82 ~!Ip`kg"qc>;Ff88_. pp )_;sACZ2sEXJlV(Y6ehutsb^Rӆ{e8hh#.R k)ݖ5`ϵŀ!GyM^oZZc'"xJT}jMK*g2;` @/&eNHNoJ#5j\GqIsh82.K :8綟\]+H7mJɬQY [2"<hCv|^ߗsO?ٷw97?\t׹ݛ 7=qt~?uG}ѯ tnGR~V>~]תO8:#Sj΁W8|h+kw,V(N +:Ѿ2ϛ "fEV.sg8hhf N)Ftkmbyel\ D8f >"(Gvww?Z!]GJdbfjz 8Vc8Vµ%6`xkϜ2*2 B@lwkch~oYSIɌ9 l]cXI?okȃ9fex Ao(gfn4ޛ :5gN7E /5k:SkӠϗsKF1EΓv}DpCXRxcj5>]NbxxkARƆNX+K{{A;_> !7p[KW'EB;b8~sׯ0I#fQ_uyfկɰ?%p@$biL ~? =Hߒp켿9(6ZmsعVȵY FZxAL#&Cv %v|%6qKU{ecp'c`80yLᜆ7D6ͪO?}ekg(~d80;@v[Sۍ?d)~ʢ3m)ӄf=k^0ݽȾ/6Tqj`c` XD B8l43pXۊ.Kt_cSb|pj+֟AH/֧ոqRԤO` b(ţ|&NDM0udh3Q}%ۗ)]w~MF ɩD`",xw?2"<*VzGjQq\FT'TӞ/Qa3HmnHY=>d;HIB9u?fK͖2libDl!fi¶u?dHͷ[b

1იn_\~| Vv? 37+gXc11h 9Rs×ǔl҆e;\ߍebבv>OE7_[7_:@ :@Lbxə 'bԾ;+ ں(KV_V@e H&&٘&ڈef#Cm$CD.f0pUvv.Y H[{3]vTvx S -B1 +hK)@zrӥ rm05c"ezpqJ^3`8G\c!-g:b<Cp+0͏O%3_D~1.;h_c8JXoʹt/9#@Kvx` ?z. / sH6  톃?ջ'Psv[Mk`_7[J& ;hץtƕVyv5ܜ/ɾ $\  n2&QEDޏ7?xJTϿ_u 6_ǫ =ƱORyV01DN :t?7c8懋RRm=zr9p 8Uf5|}wd7懋乯=vG΍Cۻ]/jp 8U4zc,-0jջ'߽T[VB x?]+/ { <Se $?]E @=:mz,ڶvm8 u)'UH/$ӃʾTA<3>x2`LKmpPR@x1$kK]~f.a޵'}w߽ ~mp-/ӕO_`~G/L5ε#2( <SenÁm/u8s^B5%_[pdpAI+iu 8U6:@LXwf &1ܗ xLA*nb2ק9  A 'pzħO_ߝ5a[v駯dWSW˹FW޹/꽩[ZH:P:q gfQ=?'gDb/}3Ub]!W5'І޵3(lz~ak}y WCuχ^xPJAvۻܹAy~ @ cu/2zR& yٷwX;$#G ~Q=n85NOp<`8yze5HԂ< Nz"2˹li-]{w'e{9pyJW0Ԏw/Vass!m̽.kY6X^y1CMPlD->MyI]G7a{"d.w9ΉdA]Zy=H̵!lNo3u9/5Z]RǮ6J5Qsl.$K\o:Tk;jY`''6\*¦NoH :AZz ?})pPsMwk>y1l`8Aq9M6_9#EBo5\|''6$'Q=%t$VY..] nNwUdu-;qۋ X=P;,n5_L+gcj8q w!0t :pgXv׻3\׾1IKlju3Ck>swb^'Gߊllkɉ71Rgl-ڥЯcK㫉QKV"~Zhj8PfTI ۆ3k&'!Up k0kr&Ԇ':fc6׽Z3$W3>(A`>8_hun8ǤpPzi)]QƆАtu#š H@T! >ᠼS_/95\Sc7J ݖp\ &PQ=I/[ӼON )wn8P`ѢsI A=n{k:LA\{ ZmI pPc]GF׼yvMÁ5F5;3k`-0@ `:DuJv-EOA"ے%Aߥm2-BzQ{OjsϯVܜߧcR oY Y/kQp0e$y·wl ׀ >~=[`82 `3(9(m_oumεBqJ7jk %pwbDЙ  6F6DlEAjl|lف!vdk502/UN^+׀ >~=[`8< `s)遑`-Fׁysە?j$~HuFEDXwswn80"Vb7ι,YÁɎѵc켏 2C `[&z5@)E@?O_J>6$ޔu4^}_|Ԍ czpi@6"op qsF*Fpe]kQȵmn>_ۺx<اg 0 Q="|#'bz\zQ]G:>~mqBmrC邩1jgt?}iʕP}/\__~_|\zaGV[s#U Ǽxc~=[`80?D *'l`6@tu]KwWZ _>=<8g Ѓq|K%XӁf9J%x׳l68.Ի9^#<8g 6\ DOX=n<8g r r/{~A |=[`88U `W &b@ x-0" ` zppj`6݃@1a@GW/n侞-03 8@ #Ej )~=[`8W0@Gʕ x׳}Dp< $w1fAdq D=䟞 ӤF4Dqq8gsO~z&L 셰p:taMp<@80- svxfgBt6΅" Ti\$䝞 @8':0P"q˦8 L| sX%x3!pಙa5-'a -EOL\v`&΄" `\)'hvz&\&.8 a` /u4<=.a`h =xgBe!l DC}dSOL ݻwA_~a#༗Be l ]bAl Z^ g B0jDnr_73lXfl`wa_| p.z j__DcEen\H2G6d[FM p`L) J.&a@cr˦80f`%:Cm?^ 7I؏l CF|  uZen: 7 >a\)S a`zo.qGyUOgYmϲk8I>I E6g-N|ݑA7xT>qzOO(<ܳ)s a`z*yIQPy.0G08$hj^Տ-u-?Š$J!pdYْM#p`Hӣډ };Pz^|N-q %<ǡ9_wBX'9fq&80f؀ǁXQBu]: uIQfp<(2RF]|Z|SI΋7SDpkm7'b<1ۉb((m:}QrE0 H曊Aky0{x}=o sc|HYy< T&m8Џ]N@G9w!LK!L9`+ca]K.U=gy-DwC$|'g}R <g)oDbzC6pvg-4#%{v;.|Ou^:(8+8P!5k^D^+g{> Vowʱqs~w"y}KE96ۻoewo, }k*ypWk *]9zܪwsHu qy_ZE~ l#Q%i#`g@wr i@hS#l@7;p}e+iSt<FFkoyTYx^g/i!{msĉcd;lnnx?{m~JMR504E)6`2;sU @uҚt΋zfwI?C#.ߩo/<ϳu?Fֽg! T*97!dmD 6a5 n?"㵻t͞`VǶ|"sF Nߓ5"ǒ5EgaZ :lʄ hv qmtxuu+%m 4Bf.c 9dj)ʱ~XMeac_У.:6N:=&a&n2RHmKi8+˦ =C5:*SڼStAxyx># BP( BP( BPV []TC= m`[~>Ⱥ/AEsֽi77V58P( BP( BP(,3Bf8!6^xI=S2 l3c>G1cB  5[g]ou¾( BP( BP( Ov=xnzG OPX_~!iw/kW܁T#AT|o]>i, BP( BP( B!p!(Sw%ldޱ[8\u[B"Ze28CzEo{{,z;Tr69XJܞ+yG{Y! ʶ̋T;KCʸ(iQ)@[fGPqh]Jʔ[I'XK~)e]k:W?2{"s.icwFL~:wL Va!R^ yɐo=զ|4~Wf$8OFZh/j#P`uԎ;0!0,4pRH1YQӰR850!0@fF\BMmA&~22U4cۨO9P}rXfFtxr 6_!O }&3lqFtxr 6_2 uc֨mo@'ל`cHefA\^@Qa?l`mo@'ל`c(6l (6067ÓkN6067ÓkN`.1l`mo@'ל`I^`RfFc)2-T. C{p ac%mo.zle80N`Jy pQ.;l B{(Ll`v`*aC p` `URȴ"A86 q n?t"A86 ZJ~_&p 6ӑ2zNarBv枣:]z=sC}l'Cme˄SlX[5V:L>u{i%qgm` ʪoy*CqZu:]}Cչ08չ+zsK$8KlHANZ,6Mjl}MrjFyju6nm~.{?Scx3Qg~QoZ7pm^M,plApR-~SOuQl+G)l.9eQO0f_0֫pwku>!Mtl'qlU5bזZkWXcvԼAgz ?s E -p?~iXE։deQ-szsS~?mqϹ~5X~?~U5Yue.~;oo t5žl`PV/=t@{[];t:˫`cQ̻׳'*:tpet;1lش 2a.; qrUX$E_T9wDU>X'/}hxǿry}1s'ݲ_%${lƝ9Ń'/:6nO˦γN@cuH}nQM?m~kQ>E7>wtwYeC]Gu~ZoC8?~uR#w5)1F.͡})m=cAnչZ4cu>Tw}ꠞ[seาH|ҪcR>"i^*v/ͧ p`fN'`AEUk0L,qi;ܩvl66ksgxӻr h~?9ی;'O1ۦ۸w'3':r;۸-3\A6cHSo]4ZL~K.U`:`86mB㴇~^O2&:p+p:\'CkWtxwap|Yv}}I"px4Vɹ8TmU\mSUͲMqim~?#j٦70M-;Ԧ~k|6۸omǐsx~@vqNt pt9Yoٳgϟ?}ŋT/LBzKǾkR ؝ 3k: >+ SHWԟ{e85m%)3m5й<YTp@޽{wÇ&޿իUhSU:8Div8o߯I^>%  ~ǣ"Pwy(ub&p30>t4 )$!?/rۇ?w!(.I#Q:: i#O7LdZS?4%|u_|qիۡQgϞ׊9P;2;cP \PJc˄Xv\v8XHx~ 0M*00  TԇPsQSz\޽ E$@Sa".H3dev"H "%2p;җ ̀S ,/:>?੘ 0M*00 ߬&hpCaՁ:B"T8Bo+靶[_c,#=ԁN!S%hyH^ܟm]eԱ8&ptLt n@C# #tԺ7=LldvgʨK8P`=W4){,ؤ:h??-}`n5+uZS!4zy.=6z?ϗ6 tmԾs&|;qv2Ay@񍊵HXrN uxvֱT־`~}I\8`;|eͦn% ڸ`@F:H:)eohYv!c |&wQ˔z+:p0wĵ>3,&qrMiK~fJuvX%hC^ou~ ~gϞ9;/_[cPw۫w9^S$~K+5TQ?]xxx^\+tQ_T<v~EYyg}gkOew|Ur?ڂb|z(zF}rMis )kwj8/ɒMCϟ;!B޽{: 0kL[lZSKn>C\uاNOF  $ i T@` '@hZ$K%vhJQ\^xhF-Ph^r}40N'8u;EvC+ݩU˨ekLaٿTciOmYԖUۋeYWQG~r?e9PG>o>Ų͗u=OnYU`:b~~}c_M Ё/7ή NϞ=s}95O L ppZ/kOlhz[1.{YN-{q?es;M*ȱկS[ ue6u=E<06|*0KluF0H tw|Vز[Cw 2u=hH7#}}Lf/X2Bude)ulsYV\b$q*) N:3B pޓkRŰNֶ˪umj.4~ϹV^lSCNx;MU76\:2wfs֑mNN=M6ˎxҼ~=~m2}'ˎpZUO,{iM6˞@`iN%c{ha]}._@M='* \Xc85 /8RN  ӯ~J7=+ɻ{VKߺxO .sԔ1nݠ)5%@N: H085 /@; )]XQ>hZdmysk7'WNUrM2TՏ6Ru.s׺zF3;w2Йt0IM }m}={x(xrMhnZSڱG8Xֿ8F8 xSfB׏;;U'@ )sU8[}hi-0;-@nPi6v{Jck] * Pûw: jybN MOݢ@ F40PܬFƟ( 5A%;rpb{5}Asshޮ >Mŋ'˗_SMpiT yN h :u4CXF6 =4 ̬z$45L!>=t ]vk* Ё #4 `Zj>1Jz霪\ g&:W'pN4e.5u^NHA<R0H1aN2RNi:P# x3lF;H}{?:`_Q#pN!&AO`]vvS#|z1vHb%*Fp:xo,tR?E 4^Su՝Ϻw0߇oJPp 4:)<_Q>#A8B"S*}z}4npEVp.zt~lYsJ򆬟I6ЃjʂSc&*3 gϞ_H)0$;,!pN!*L_YXWNPuգ)js_v(&taLoOIMUF5@6xI˗Q9\;H&@Qq @i^~fWӡ:;$w$#8aB *$rۻF4У0ZZLw:U`@ \,z<5C3"׵nZOqZ:A|B޽;:uc^;(?˧P%Ú z pKu`'_ߔ*x \ߔ75k}@Lq(@ۺ z-) 51(Ϟ==' bMp߿r.p`/ؤpy.T@&pҍ2p:~H^o_~9:z@)Ogr?Z_g8HxtWGzNyցSz;U`;[e|}gk|}4CGwHXUd_HZhpf`g:ѯ#(K:N 0P/ ߘC%lF3zp˗/X ;-T~~#W|媃u+a ;hOaY>q}6̱V=$o{ 12}h2Y N NX0wqp,TXVЋo-+RNw-]N56pÇux":u/tpuuuةuCh<{Uu~r]ԯ+Nu[zG W|{;Wp:p-kNUSU/w٦M`X}5ʀ/,P`O٣W:[qz^m> =aesIfWȉF 'ഐx#nyF_;"#1:)`L}:IjTqLqgRdB k㦓<$G5[ʈ 뽶D{#U̬k[e z_czNRg%{qTIcv4$~wi빙(mE>Q{Uv E~|%.V:Ϩ;h+GAaoJ,3q.c\Яso_'j9a\ԯ[x;n]⮒]0t3y|Y5 VׁuM`XL$>:9ccj}pϰ/4 ;FR>N!+$?dsbsow+-eHzn9Hm:P g}MuaOQ!}!]wl{;,wrMUY'3{996].|W0D[$q򣄇[<F )v*3J LјC펬}i|1z/\CVHzȠvas~)avZksNCm0 |P|} :E~bs"m;y:\Q:0ks9O2}&|'Ǖ4O@.ߴ/rE9zװ1;g>-VeC jlPݝ*X  b]]]3B2}梃HFQo\K]B mmM޳yU;t]oPCr lM`W~rM .GIm kJNxd%ԭQ e6S#Ǩ@/u>>|rZ_@vv$CR?Bvo$~Zk  ;ɟA;AḾfKy8kg]N\Kpݘg?&pn 2t[UмЧ>p|UuGd~)#llwS t68{8FR؍ԡrB9{IMNi,@>]kFi4fRbE N t3MpOi罶.mX S1C]L)wԷƤE]*:_F7s8xPQt`:w˗}S5*:nb4XMtm]OwMADŽcXmAyRxthVN} 6_y!TwZ{qL 9m ';&p'B kx:Q_xNw(6Lz[!e?Upg9szmGl+S5P{ R ? NE/tݻyktn6 xݑj^Sn$>B}s~d.ۯf\[amw/6GB}y/1zsx|[W3Ueμ~um;gW,;!p`0'\]d4 ԩy.T{{N;!}r8vcֳkU)o^ tjVSN  $^zZMow4o"MHzDZ=@Xcd}r:7Gq)=7\/{*~ UX-S&Vz6q- (N~79Ne!EsrMwpw'>pJkpNeuݐg7$-<t!myÝs{tŋ]Mc5*Ps=%|F QW'QI*rA n4y6(QvB8(Fq"L͟~ŏ_C]wU?>6é$?zފyt_J=w3;Đ;Z w 7=`yc~_GθI}{}cYz!P,$d9HZ=e\$F, Sڈq>vyG˴Jhx~~$3G"p{ iohǍUjt476#{𶼹\B{裩! bΞJa! iNc| iUN 90tE5婙= i?Y6?u5'4=u u蚡q޾:6#{>/FҦ} +E^҇/׬p ^`4vvc^\J{裡]H=ZPS@mΡvrMH~,q1Xsu:tN7םUw Z쟡NcBw.1co J*Q0Khd )uoӀs |X VH9MrCsS o*Ig>\j$SwNg:z̾{\>6}E^H/uWe~LJE`p9BS*䜇Ŧrp?7OaB࠙ޖ7uiIo˗cBj*!l0kzU?/̡cw噡|\b|5|FCg>v[ܗzk |O@cy@JL<̮=uV裞XC޿}z}5wC 65 t`Ooo\]}JxQ?FDTGOy\&رp@s$Д0Quϝz)7;񏘣4NiF90@oʛV.P>~LÇ}/hd5p"o)b jx:u\#)"AXN8g3Х;o/^ *ٳP@O0g75<p" @ 5O|Cp'ݨ*|F>PA2xy,dA>M\'B΢XP5ۧGBj{h!iM{w\#!pzLW^=_bFOH|/5}Šk5\.ݱ.%c7f͖CF<8AaD ;&p.8 SD{'jD6:e@]- .tsefPW^ #tut@|pnCj5e! ?LkR7F;yz}x[~B>5*Gq z^)2X &85'` *yb`HQmh>~;):6pZ d@ `)KZ@0Sj: xJؠs)Mס@։ 6ߜDzd u՞@| :-N` N%/t/\ P)b9f FySlԴ qKTFp2Ji[cύu1_'g}is]uha<\))}klSoYp0ړkNnHm2'!!|l娛D!p Cv  4Yleb}T8S_'.SZ <XfڳήkUv6vR Ysr[qmE\s w ^څ7w!}0\ 5O| w%9G}z]rnr0; s{ϝGkþ>ZlC`=kTX`f:o%}@"pb@?>p/j[G!:>>:| 6V[@ms6{fP¼"=FֹOy^a= W,#>+ 2K\gyr 6 Q{L 0h,Lo͚-)C2fw`/ն `y\`ZS>vay=cʰ RXD`ӱ^v}0^@6m耐̟S P.; e>ڵ\ШzL!\WP\Ugϼ1A; 1B'y'ל`Q'{ 6( |g@ ] h5()W^ 7 tP))f*Pu1ىoM;u2p=q<2Bg/>[(~PY h|r 60|Uυn}؀z@WcfPo^ޖ7?"\CCpK/pkq0w}!Q k.3i88ַId\Sq15V l=u4S!iXX=L<:,9-2۾7ONRm.ꕤLNxc"wENlۼ ] d&Ve:H"YV#|jW[5*^=YcԮ>_J&xb*M\rtեnV'ל`Ә=LEwav 2Ր0Jxt o^ &kNi 2DFVœP%i:t(|0u7E{(5'..XI^ĒM 0|Ҥ#T~ybhz8 68 8ܲ9$#t Zg\^?׋an pr 60-:c{ڰـ;Q~Q^$1+vmmys+@'ל`$pʊM7`gg^9x>HG) pD 2ih=\S( BP( BP(ʔ P( BP( BPZg_-EBP( BP( 2q}?e{8`0V\o،u130 u'î>cA=[cK_5K)#@6u|-Nz=RvCj:OhQuX2ӊpj0d\ 60JKw'eb \څ ~7r1T%KUe.KyPL^Ga<0 LP'~[y|Wmڳ^uu춍y=Ծl7ڟsM7IXY<ߺ:lX@{kz}r ,}i_B\@51Xp%DIt)8BwNߔO?k\_gM ,k:jయ:|^E6]dEm/: qO30҆w`/;'̬K0+fI~B.ux1̌ ԣt7,$l w~~ܐ[B>6yt3ھsX?oU?medrg^7Z;k v~nY/_'A9ael{ԃ6&m#6s ̥Fu(@GAU1XGo88(B!A+6]A_mf6u%iau|5Ba=uwu#Ol68($Ҳ68 Cm|9,>t was Qo٨i^@0>n(`qUewPv 4:;͑\vO^w3wǮevҋ< 8\;-#߁ÑҤ{'" ? p?3{V HcD0<0T'^G-BDʔk1 =^rulP繋9&^,)"pOl`9ϕ]6 5 -['n;{=? Z 8R+ek߱3  gF\ pLP(*d̐@Jb(pYmv w|X`]Cǵ2~+IoYwz_ i6ƲA&803\3vTJ4Pl:1BPdځ9pr̡ء~P1Gb#/ ߭2Muw1v]BIU!-1aVBZ/sG8\di'.}R#9p!p( \Potq=xIv +c0z7c76;{DvP`6IS}]qr-R[+;;u_`]r~q{~6cl>-onN)Eo-e쎪&wp!pгw [!5'Ҥy% __*pѷkz_$}͸N8.jE;?zUǘeq(Ll0%N !#8p!ppºw Rop6>|}]xB?~}MyS͵>wk;7g6sXtЁsp LOy,L 8-GN!s8` L Sw5M옣sj3^zuWr:(ԋтx팆KCk$޸_~}_|<{aS08PAQ>M@xz{51CsugƵV.>uΨ;zk׬;:st`6S/>A#0+qOݲ8a[140heF:xǛ1GtNF1uNsw]RG*W%5trCT cÑ.H?7ojZ<0 !~ɰC,/;eԶ(*)Jy8lt1nƠ~s?N9$эMK607Bsl~o.d`Mx@7RC0kqg@Rhc}绿:?#ҩ*S}zOjtY)o6o.$B_׍z܏\3)#ף u& 8+)7lao$1ppina!;"!YDu~!\sZm *@ZV]TPA`TT;6Ƹwd1tCPE8P;$cя;w3 5z*JF$t % 4;T "ppSӣ!kwYqXϡG1P,޽kt0Gaj:qCli-=|j|SOq0Ơ7cxEì]Hd]Z59(PД Bkk9pU-8(pcHPX.:mb0,H N턑48H2櫓TǒTǓ#K?vzwsA&RPm>dS x#=OJ_=l֫v0p~&%Fh8ϵôeS8=:yV{fO[nS1\_g鑋bׁ P S5bPGv=hhKQН+67s ;%F5;'`ֻf\NwNsF/g?Ns=bQhLp?qMCLУ鑉Ts=dcDF1בA}^z=2@` \Gh9pRt0)UԿ#KN0<(z 2ڱ)wU+s8eQ`ws4tiםl\̎ByުœVG p0AjqM_d t۞Qw5q\W\p}fg\G"=AJquduLʜxrb`6h:8BhLFWiRGUp=Ǭ]%A=_6 tjal7)>?s kՑ 7`c/hhfl1.8),sNOW9~#7$~<1EuӃ7iFPʮ]u73i8Pz)S0 mS㒩#=TF!p0AlĞTk3 I p;^Pyhw_GKBk4jI\LzɦF]WN|9ct'CB\S38FOҤBSBiN5J:u(6t@52AYTנe3nґE B #~p`ޡEBP!pw:$٢à!xf! ]8l@^m hYN;gF̩>w"ӈp1f3 mnĔfH zJd jYwVP(e.pNrn0E9sCяCZm莼qݙan8`2 ;6mPn0]vSikXW7p tiz굀;opХ9U~" 8p\p@ybg|n~CA\f;u|@܌7 / E?ox!,S ڟNfdH8vc7pV<72nƁL3 ^:ps77pdΜf>(;&`n 7 p6յ[2=%Iv❡t=|ܫ#k+ ܌7:I8`fĜ6 &Qq8H8%[iPn0G* 7{~{ nƜ)am ྟ$jOnƁL-5+I8x׻ 9$<ÁU6]w$܃}3nƁL V؝ox!IlO܀$w|@}{8p+(|LKúx='-$ ^Ɔ}܌7btHp3`jǦgCk@\&;qS{vv! k_]CqsH8xB3u :f0h }nd(q܌7GgKrS7pZԄ,`8 R8f7 47qsH8xb[>1a8pP98H8译9-C%p3qSTo5 9$<1N{t\A*+ }T{܌@^ >7ps%4f8 8p鼿<.[wz=(uY1hi?S8H89$a(q7fbw}=k3g8IWSZ2|@(ԸRp)oaMͩL5~{ 2TG9r|@_$7n0e|J!h( ^K{p#nNIP&={ ]JhPgc@CtaWHЅD؊r @%صEut}^R] I ,)P!,e(Rssq鵫 `Vd)x0f(Ba( 굉glN3:Ih =~.n<5~6)`Đ<-⊡ ٠$AY!~ >+PR <.ae )=Qu ?HJܘ-Y0OcfA QK57;2f<9Xk@Ԕ襍9R( @Ȝb);]S;{p6R* =e׫ +sCB)6J@:рq/v# υD}K YB?l @؜ VK n{Z|] m^O>l剝! {$=ՍM8р/'^2O0RgS>J] &Bp꛷`p'̄N#C㘫׬q:`2g8fj S#s 6'-M# qS k0rNDdnfgew((lܰ:qW C&@''E<5 {;~g~߂U0ej%d۶E_v鵋 t݋*Ml9f~F}y$ M dޛ UjrKDCg1\|f{\F'LRa%7cWDoH8x^6`Uhc(O| s5T;vM5#SeL= ϫx`;L.5Yn{GL2|`v}ۮ6xo=b|@i%c ukhh|Yܙn(͂xJ5lPxPzV8]ݐoYˤwl29Z0Oiix|3CfWnjKG'JU7G[C3A&s薆9}&Psi? OfȔxJc:kG1, Ж} 2%,:M83[I*t(KU$hp;{gbI:3l)߆k H&R°0,m'n&x^fז$ >gEIw^{2c8^2T7hW^?[e'OxD ^jpk0 6߹_*wa0gsl,yjЛ)$ ڍ؍. ð4sپzۛ.!a^XJ% IÇP> Oy ;ؾ1JcB9 cQ!fxnS:7j.:uS^UI9'@?` s R״)CI3+?:b0$0$ -d(<V;s5gVʂ0:ٟ$ -M.n7rKgVPc #CD0\kLժ5}%lB$K ̆\6`A)\낡tժg2zsY߽ɐl#=[lX1Sq'|-'`cy< Įa( B͞|x1 <l,7s k~s GN.-y,{ǡ , CRByT @  s0ᱯC`2r_'\2{ӥsI6J]F4JWc9k5g @ cgn &Ul9>-Y0Vu8@&ؙ0tE?/kK >lc8p04|'7>-Zn?7{KS ^Oq* 9Cdol^(kT3HsZ{նܬT+su ՇOa>p zk}VSñ1̤02erRwKnVirϰT`8gEpp0DsCe ג  >jyr9qզMvG1{gInXY;!8Ȑp0D@Kw+N0avY'}9|ft?S9p-@pìy{P- ?_T#M>x~>U_s=K0\цr & pd+_0X%/5S>/.MJx\k=c*t0lJp#&-eH8a T ?kA7&s,Pl%& ltt/AeX B+禞xZ7gc  M83{=? e;p%ŀ_5q{a]`lN T@B"\˗A^0$@'ly%x~w. ,g^.Ey`N.ScE-@d؛$s)qN2OÂ.YWyakldWKܒltkQm xx)iV3fշ!7tpgB=?bEYz :Dp]6`P - 7 "KL:hS@{-) l\G5q>/e$HM1-w &@Td(20ryvluIrf\?e@!Y. XWBwϋ?}q[Sy{SO4*A݊}'j?f(67rL|3`$76WN8˷S0_oy= h L4ؘcn=}`{v hiiDgeH6nuZח_Go?7: xp8Bo_@ЭvHxADwSO4H<_m&9BZĀ<$oSmWV5(q<%V0#>9't[XǷ?Z?|% kkNU ,_۳A$٠De t7i{,AzH/@A's3J-T}&bDdYٙť /뉮l +$$KGr tGaH8@>h?V6vRႡlM8&[@r+{w%@ϋr%_R>{tY'mҷ`*h-lw?Zb[kÒǻ}/Sywry1g&Ym{*46e>}.}Lq{NlY>eiVxND;3D~aeЍ9%s*9Kp_$zgmqty=% s jk1`[w ^tFhP K'uHs\=Tطx;9\sGrϳ ;*A򹣂nL7XU_2pB/ Aq)2p`P҃v޶$,_zzH0?A6 eł1} ?/JΈ7wUKipVduA \<%.}eHs[ji7Q=[#聛V6@f QaN nwAepeZW'HP:}nnpPokH۱V \r}.* zى䖨V'rz??'b;N*%nF>t,o2eX0D,4`g ml fJ8HLq<A%NZ_A%'tyT^/e&R =CW gQ@km6xbwBA3tH8c芛t[ QR)/I8aۘSlRad~o_FɃBmc.){6`;; w%/ t}nܝٟp )' G31<@%idx9UdI'p KLyN+s^y|–6ѧca7Oyo_W_~cG $7] SO"(Ll)t[ԞT^8[qF {vJYYZa rȚ XwK7q"T`XZ K]- Aо;W'Ԓ I$:@VAh 6 4%!BHh= oN::~7@Jf_~KlTKn mhgY]E~ ~,<\vر|=蟻6 ]ouuÿeþU jOÿ\o3Sz@/wkp૚P&<|[u:έ.waÎ{7M8Tp6Ʃ|6q; w"c7dK>T L8pvA'|/x H:ST.&A}V2BQl\R+.lMiy'̸4@T}s}t,knP;T] lѯ@Z}F4d LI;$ۆ^[&|~ c1ͱ-@]>;\9gXܥrџ: Vk, cw7듛:(Y=u $]e4+n؍s(:~`MpgwT^ } gԫvmH@}/om.+Tҫ򳜙 {ҷ~1tp?.[p]hvO wjv/A::?=6;ֱ~U8 R ksn\;UhF]'tŧ ot‛x D*V8'ԏS*yH8NJ1{`/.쟻CRn`|nnt_2U$^9UԗeS%N*_6 _ԗ UXSGqˊ yR>ڪϑi0q=L~8>xT&N}8Ulbע]J O?}Uxݹa+./RVݲ.TG.mR8{C|dnWoN%/biz߶lEwsy"߱e9 ghko(#VzR M (gY۟KjUy9[KD _}RuL x{&8ilΨp.aد9T:t.%wZ,9K6LWH-g>[Q&Lz)2Xmt ziwG?=nߌ~x;`[((sY-3>SϽ΍e* I ;]y#V<ׂ 6E61ߤP]'n]??z`PgOkzVop AH>׹ e`ܭ\ /qnoAhf-7Q"V!gڸ uUB͹w/Y#ۮ\F; ۭ˷F飜 ɢ$u܀:|Ρ/Vs%MW}vB~慧 ʸ5T&_ݔain&h" eu1`g2`K'He୔.P77 "9j <݄_56mO/5+8o_+,Ӓp Mn~)( %ڞ{9C)c `I}}tigXNvmq>6ˌg.=TF똻kAC{pp& n@Aw9uNn[Rm')9&+@G$A@W+h<?yccژWlm}'> sd{K| %WLT$)l3Ւ ~IVq5hpAT0`$EcI_ei*sH͖(Ѡ|ۅަ@wpPV{12˩*!Kc}dɗ=?6_4`e7Sp C doٯ$([8c6,Ar"gsSO,ܚ*AAqRCA*`. % 9#eY* S&H"Hp^we)W8le T/m&7 ߡdC9ӿYae pP?~T{wQƧ{Ϙ59LPOXSgn^Z`AV*x:[@ Ċq4`dc^l ׭nt1_e%(~Nu=u\wߋdz>.ĘFUU@?%ctع]n`%$. ܄lZݳL=B'!<tbTKa|xZ8O.n&,8'|, !@?gQՠ D+_#Be`HHO}:IYr@D-unENd?+ b^^!@H,X}D[E]"}%$*|Yk]pl7nR@b;=/Lw2}`0& b3SFUC8761rαf"XL9 A^-Ao; ʗۗL. @A -4"?{]!xtԶh3jB[a߲:oKԻIO; bWgd)@'-^g!}QI8h޿`v% SY1_[@ &dݥ!mx-J//x1LtBuhXR!cFMTTU#X_徂M>(+S]#*Lw*,mnՄ.oK[K/ 93Z1γE;k7#u~;pЦ&C쵫[G71I0]o֗"6j^1;~7f}M8~ϫWuoOǨ~tpXBp,]^+=v*WY~נ~kN˄|ض7 uǝ^B3˛C ;mv3%cQwƇ;#|íl0rJ.c7DfL޴J%f%નj0@:-9 \_v~h%Yw;Lcr}$O8EM|y2;&0Dl7 ZT=pm}&MyEn78HVK&gd^K`Hd!/l rt׏tn^B/7jܲ7N7\evO3N~f7T{؍}8]kԓṾ+_ǎs U J $^nM6($ ` U± mK3߱w|@;ł?+ȹ~g]hs.R T_K{n}./>E @~zκ~[ߺl|_H;rjNg=6&#B=,pYW:ɛR;O_N,UQ[4cY $QnҾJ# >L7prQͤPAcS2n\>ic*_NV<.fK5{ Ezs@U }0 6HX.A 2 ֧ڒ ej)8K 4nՏv|%jK:d3ZU5tZq0U $Hp'uN8(L*m;>* cRV;ۗ ⛻\jHxv9|xIo.T(U Բ n۾dV">~+ mL/ ^_QhA$]d $>-&sNP;pP)D@ O/v:9qbP&nC~Ϻ3T5e3H( @I ^sgKgy\%}K8p[]"p*5.k+4fFU` ~v,}PS@է^ǾW6 ǂe;jв~j[/K$p Ky/"7@[_Nm?_^z'~mV/7uN.!նUιUMWJyeF{#ՎT̫scu 6Wl`9_x u<{ӌ"yƪȼF3w;n 2u^cs<Ϝk3:㹽PGJ)\R*E` $Q`@`}$6 A,}?]:?|I7+}hK38ɀo}9Ηx Gι:;{a鱳dK%rSߕu/`hx|Bwn=CTMnBn}M.^va3%^>Fnojgxf/=^ "2~:aqMy/rmWzl[yaU'1Fh|' | l6}mn7$H˷]E֨j00,>^fڴӺqCKmL^$\5$ɣч@$ƭMA*&\,8t 0H$ :pKk y(tC(Vz[s7r&LZo~ Xn#27 8ήm;־J8gMt2{FRvK]o, Kd }ɗtP.pއk/gWe)z/Y=_m@*w#m_w7ɡVn],Hoם\"L"Y{hÝC?>{L>-&vmU w܄;rv,t!\U-%(vkN -$.:p~nPRqX(_Snߕg?f|hKo/7.PfT ijW%q zI?kbq3+?9t{nJ8x<֕ 洬>9\[λ} &]\mr[AyzYS%U H9ILs ~ X t?6=G s\YrA'x{ܙnE=߆;VT:汒VKanAX2!$&nEX~`43}RW&(LLe_vƦcJp8Bޱ&H9Q߱ؤ+3XnEkq22F]&c2rEk#ɫfA`3bYj0P@{ zuhE7pnu :Ab%y\ՖTHo$AH"'Y㸭" NIc<}U)nj-@t3<1(7&lP n=nagɻ {$bh+7@[UĂt_pO21eZ<'&?a\K*ľ@.oaܱ*"Ͳ#@ۗtjpؖRQJ@m9HoLN$86qA2 >DxrY_WL8U7& v]}CӉwFTziI% }_K*Jwi[I"5H{ϩsݜ7[Z cyf>Upu\ kx6^?Q`ʥfwF}fyy:ѡTm6pfI D&%3O%|/p+~1>M%YXAm=6}߰mԒm-,l>s8k֯cOmx,?w8{,=fdmyT;VwU걡㔕CmW/$g=:˘wF% & uT\ݛ[R3BmBΌXg܆% cބǯqoU zBD9&Y|yCI}>&\r\<{:mRBA޶툍+a!7e5z m5#YV2'3I-2z"ϩ]4z?-_U * BAiľo8 BN_A.ա gN8h{MH8'n;Dop^|3݄_sM68/1'J$FTA޻/V8i)r,p[ᄃFkT>p+Y | S`fA*@N^=N2iqqc.p0޲ܚV&| QǜqMp`kK ]ؿ9CU KI yڸpIPpfM=8.-)v=xphp*& K$4f;Mu{nTEX[TXFzcFJ?,C=Xk%AœpP\Ў/w6r@PI:P+Uje.__FΛp?; sh,mI8u]qzp1:v r"I8U|I K }W:!SyA~,#r__BHu JLc7'I" ࿛\7`n)cL?ZAyW~ 3k ssPAUd/'5z3IUSSx⯛(pnqߪ?>n% ۊm pD7hK %%>@j3U-HTEADb< `$ĂRay=k#5㿱}F?M T އ p}h݄Cx,;N$z]})@ R܀{2Pwk$K}i'ƽ #}}f\k&l< %&6N" U J#aAf KRAxJy BjK3$fchPkwD ̏$o: Q nbJ, 0X"Ac[zx[ɾcb@xj m<w#9ƿp.!;X8[n>ۭn%DŽ}{}=miyTrJS ׷+?kv+gsڸwD]wO&qM" U pzuˤZ ]+iiCy_B,!IIdBێ6K4Vb(ZKx҄g,Bc 6 .N8חphXT ޷ebun8[j~ބwoƻp0r3 ;}f(ޛxGѧXY^_1-2On"ܝ{FqC}7&, jr_7]F] ] Փ U껳@CCe{pREsm$X~QU6TTxIYdP{>pp~V /M8pEAq@7o!V:93͙[S1fu_ˢjj~j3̗}8O=c/:ެXs/K_UrFy(Q3˙v%9*}?2K#3j*]3*3Ir9}4+} Uǵ_D1_α}cW.dFn|Y]IXQǎ%_j _ֳm [۴M:x?4n[-k:73|@XB7 4g,Ms5qJ=D nL86rq~>rڳ;쌱U\U5ھ~\bT^WwS3'6|x//Yb|L4=_O'p]T5M =袍D _RѡsyJΐk%*zq -/HUY!U BcJ^4UK8X\aVYp~X#3:qOWIWaxֿIucG1L\~GU" ф Y^B/p)@N&}h6}%:|厗oj vSc _pmsm| ޗp!U,l9UU ^&A* d d#trn>KN&ƻHSM0=C lj`$ɠtpx [gb`npPkVp[m$~ PmK(Tj N^ \1/K8W98=+zF,1\ ~}I"/頶} -[&Ͼ{IձR) NrDP D w[cDT.(|u!2 &?1B KZ67-ߥe[j_w>2~fߡHp|Liכ?}^u]~<YctPQT5 wYBt 'v(XbJaNTfi~T5)V$#W bI fj*${- ~RQ}$k@^U \T,p+4d H;{:LQЛ]^dֶ XŐQZbY U i}]؍qG#\rq6tCf,Ӣ& pu\VftcHj8>~:=62Vsfl7$q-܄ۛ1$đp+!Ip[vY C'GU _uk{h=6?* ]rg K*1}f;3'{pu4&;[|yvR־E6Tj]\0=ϭ:WNx[<z.U?1pDR;}-cꙺUw^e<ymn?X</{= U Ai o Tþoq:8_$8 K힯o}8}M-IqyIE&E+46m'ڰn>.~](H{# ܆}dF&`wm?v /ZqlٷpwOȹ6ƞW[8¿e8Ğ83ti÷-x텙pT\wpPK6(g׫|/+N@߭&.|T8ppH8(Ԏ 9I_|InƒPI cyJ{Qo'  ;S>V/1@Tm` 0܄NNc9M=_%UՎ/q#ӯeslP兝?/_5#9­V[^762nUBAOkV?Yg9܀/(_}Uw&咽oD_|C3݀;[{mЌsƑ߇k9K4V_EGjӉWpVθmms-Qߣ4q}Bmy,3k[t/jt %"|_Bw?N Ĉ]tU2A#P,#XqPuc܏*DZ`,IDAT4eV,0g-`tvW:2:0tEf;LL:8MI,qɚmrpW%L.ѯEaN8oc~Ƶc 8YRUGH8x ^Gkp%J8%ԂDUpqSߨIx^IP/𺏎$Q|k{Fc˯V?tDzY_>,ms9FqOxN!7'H_k;k]S^sFW:"´K&-6X6`M8(:*nj%Ѹny RBBsk|5hS%$\Cc]uU +^6V*Q VŠM=Աr:XYq>.ݭ UH>韾v{{/~/}]q9=z K_?|<}y{^Ŏkn{1x 9(<sp/d97Ԗ֩MylۻƑaZc;_?>YLGDEՋYL0=GS*RTf6 i,KL~|_4_*J VP>8Y_ 3ȧ]>ℂ\A!ࢡ}ˆm *uP81m㳱ڲe ,g􃄃\ 싪~TAX㄃cA$. a郩rQT0Zv!>]Teא g:]JL4B>9ObR8F/^.I}Oן??gta<.BB- $'4xݼ@[)~FsۢI뾛yS_>^7/w;b5TֽwmAE^ cߖ;ry$a,W.?$lp_km 5pw˥x3}zxF|. 7fYO!(#wQp֖v ibPU`z.`Ԧ1+8ߟ3T׫2 S9ZR`pjtA:6C}LbF'Zts4f`|noZ!^2GP>¬H8H8$k-\O|,YiT}+?E|A1ād:4?]N!>FD$A7 i@.h/k$$1T@%0l$Wdܟ1d|\n%:5ഫ?-nnq@86AX mr Vx9=ؽOL8HC(-0w/I8[bI88.g£ j?Ȝ.+f1?뮵[^~iDN+6I":A:>w霣DAeJf_>Z5MjJ~rA`}a"Ī\BziY^bCҾ16 pg3ZF5`= {]9}H,'-L8q sft˱[t'}ϜgNq?oAUjݬ* ҥ^3 ^& qBBBx??^w_.OFEJO%2y\jO[HB4}mV{_?T:׫8?~=V4ke3~}iZCpP0Ǩ@p=VzXZ"Xt@b-p^ fowl[%Ծ.!Qr+oαhyr 3`NcsrppV_y|jwcd%HKL6 Oi@~N~&qARIaospQE/02 Ug-8~/w-s?*COHsgdzo,4ra)s O;cזr9Vzpmۻ즃{&\W%xT9൩j^`<>{>- G AIŁj6UJUO8~~޺FQ؇RO_/è BDOL7M-qm32VL8h200?yi[=!Wp L3NKtF"yJ .c:sZKgA{1ӧgj~.DxW[TsJ$kOV&$@?>ry~^;  ~6ncTfoQ@ćU䏪%/-V8\yy(ZECu{ m?6g X3ÐlPi['N4lz s>GAYfU5PK8F*Ll!?UqpΟYkDl.Մz6NlxI;Tz#~N F*ՄxɄmɅ̈́ʌjOA\a!Z7}}kACdKHgwUJ3/w Ǩs': AR&~>U=Y4wicۇ?rpmCDT%m ?k Ws=l<\zT5ђp,q& mKM% kU\׌^X2arɅn ߯%IZc̨ y 1fQ{ܱd6#`\cn9> K*l;|Li0ݿMt8; s?(]pi0&_4+_ni>EjR=,_}[6aIn2FnKZ۞>3{,vκpPZ!8/]/W: V~nY\V+WX;0z>.mc){ ~pmy>ñksϣa%<{ۍK~I8n\\pź}mtKyt݌~aܮsm'y.?3$wѕgoO3yY-ﭹ;n:- #m-6\uT8n &vdAp8[ fAQ9[f4mWB<`\ r -}^O8`DvIq£;Vf*k;6,00;)H+/W/x af~\5 @<[> R}}\F<';Tdޅ*&&iAiL  '-ĉ礒ý 3ysn\0,<CYr 9NMm@CnVzxM ._i: Sq2@90W QbD)WG%5 O3cIEmI>#j d1I*LUHf7%T}w`wx]rؼwW>̼O>>2N6o N֏!P y= o/? A+$ǩ-wٮS4*mO83NF">{A蛴2ӊ Ø+=\ ןgt UB;ԞQ I| xͤe˖V RM> o;Վ(.F?nTl^]Җ[z Oo!R3١o;j5o~;g]`"B&p~ 뤂U\p{f==V—tiսฤ a)P@o幤~T~?SYBtR<[۔\S=0'?_>0yqFnIsG2& ñFBDa|4&I8=K$P,@>}K ^}< IæCɗE^ޯ%GD0gi>J6!A-W}Zt%R K&w(חy-C ^qNY4m`բ iE*U-S,{$ӟ-qlZʘԮ9WsuYإ m}/i2YI;s%8[ I8%cyKP%$,~*A*FT+ LG} @!XOuDZަ Ӕp߫/ ^EU-9Ժkx8qpK8К~|ўJi4]8Tv@Ӣ',g=0]|i$œ{&tE_]al~g]>Idn=-M. gν8*WI$FN롿N6A5` PV/φKjY} *7 |~ߜɟ xLe@!yaN }S{7TA$x)\H.!" ~C}y5'c=O_Ra;O%$1,~_\/BP#B G XDY 0rJ W/S}rܒ n3>"ӮUy{ 2ԢWuXDc|9߶}?\I~(%tqJYDMerK*\fx6ӱkx*ퟺs]uo=7ϕeN`}2F}|0La)}[SF-i`D<}3DՁT&k]q@|p}6hCZ)!w_ĉa<9NLh HSe fTi˻Ǯm|`=/^i8SIg?Tf*b&T,۾rLrU+*U/k4j%J3N TA/~Jf{llU>mY~d1csSzg*]׶ Npkn&LWn8^̰|3'<~^6+HS0 OC%?J)VUKzT-!=O,C^Hp~x M  .׍my4;䐛=%r}}׹EhBڦ9c_ZT"i=/=M8u&ٸZƵ|39w͎{_ ýwRszY"^/ ݵ-2fo,P ?߬vVИ&CzI*9KB&/ڬX|䂢iP& Ʈi/0TW龂?s8g:]506-gעJR 9w}YsJsj8āO4=wC񜥄s5mn v'̾6rljӠIw[\.Roc]pړZc gCmVr U7̜mH*wޱ&Ds}7KppsֶK磾LK9$c]J8m,ɶK*õ ܄EWnƁ놟v_ڵ-l0u TxلVՎ_[i-4`[+^Z"X}kfOi>q S},Z}y cVJڽajϩޡ 1+{@ !T@̌WAz?<[,0{ZTHBT 43QܷpPo%T\uʱU JAǤ]Ofd^胋deX196ױ c_i{}-czM'!j u{?NΘ9\& yI59քJri"BŒDpX!Tjf=709Fn`}qq7i8}@ qi  q?7zJo|1DҌ4О}M8(I]G/BrJַ^׵r'/:.q{8^h5Ym*MZ#ĔJ*!tf-?S1Ol_ ?4#FԒp^Y.5mŁ$mBBƨ~kDeژTȎc}ڒD.&x= _(/]=B5 C7f?& V>$8)\WtI 3utlL5M8ږHؿdj Yu'̱l쫴Dzͧ]=gY.9svw[m/t0\uX ]&ck_ G7\} _CChWh_<8&3},F@i0?^_X1\!Ni̞~9,$} qH@fy3觖3Hgkz6g|q9Kt*i}\vN9WmzʹWX80.zmx_ f5T*K_oKhY&aQ72x/=7q_vQqw5qͪṹ1B.&>m<˥8:awLCK8B:Eݞ4um{M8Œ$>暛pq h9ƐT.iDz[F 9;8n.7 ?o8Fiy*.@65K~N9y }?|֞Cm!Ik8{eCNc#]=ڲ4G_*p2/1xl}\R*ߓJpL2 ߓ T7Wƕ ҤP.ǗX؎?^(|<f~fSu NwnV!W8t|{YJ.—aOnPK6E3f}~Zp 'H7 %/-$#~!E`IENDB`megadepth-1.2.0/megadepth_logo.png000066400000000000000000000412601420302544700171400ustar00rootroot00000000000000PNG  IHDR3gL) 3d|2SAf >L) 3d#Pf{o6x\3t>a&4M{i:0l[%o5 kճr]gUoqCӉaozt:M//P@n3u}K-Akߠ" @|m~}/TMfy_].P/b~~~#}=ڒ$35 l6r%MI_GR4*[f*ȹHI+BmS___RhS̔w󔔮UI$¶)rR63%.n|d|MIo ̙)ᕜJak'p,g4MFV^3},fJ'SgR~HC҃Z<Hr6u>\[RJ@"]dp<N@/e-Kf>2bYw^2SBV6&>Ktn2|fGr9N4|"TE+A!˴3 G uJT3S"~rL;\' پ*;, @l*]riO #BRT[9+2SuyO9HfyI[̐|L oXNnh$ c! \~!ۋv *l8|&-N λ7[/rd`rJԕ|Rƴ+(;l{dвQ5WW|Pn|姝3,82Suɽ7nrtݒqYƸHa'ND#CiۚSIy) Z hg#eյ"Sdc,e.^ufT3VXƸ!3%-HcdeȶnNz9ELb(#Vc/k1?vzŀ\Ц6$\S:L܏Sx諸lN$JH&.A~jM&&Y 3e,B̒ź h[ےuN2S^H#!Zk h[q'VvUS&C%_>Jd,TCk1$eKڴͅ<$3%ɌWN%o.Cvm~m+) WoN.#HrJ.P[=4~ 3eA%D) vZ*І|L9(ihca+kӆvm[,W5vv&BrGMv;L̔"c=iefJ*me'3L գMG끚L 3c1Sth=Y~眄Pl6@xm C#aU>3@[6PL 3ִ)z=ܪdٶ h5[zŨ'92v/#r3&â)d"¯-9E֣McwmFW Q>"3emSLeM٭~2S&Z!Сha:x^S2yݻZ|.dLb"Of*mb?d ûnדiLth=6޺Idh\AH^͢BfZ+ CL2If*Z'!8zmup0$2kTu3JZEὑ&M&2Sh!3eI)"|)A)G8z;Z ^75ɫY\L9xQ+T&~L$3q}l YСh@/[ZMME*I(j Bj->2SoPd2&Cf@'h @/4t:meKK&HX7P{u5ɫY\L~ 5ސD)dFwk"—G8z٘46/Mn&}y5 )aMj%2Sha.eT(|~G8z؟ٍP+ܩpEὑKh?L2Cf*C˯G>:me{+l6sj@\AH^͢BfMmSO!362Szȏ|\uD'3th=^lI!Uy݌R $fs!3)*. 2Sh!3e̔~`j.G8zم4vG Tuu3v5,Z|.d<6% Vd2&Cf@8QYmߝ#rC&^v>a'vU^7Wh񹐙rd!pÔ 362Sg~x^ Agi+C&^iGظ#Vi^7cOTWh񹐙rl"<9ҐD)3S'^oT5_}nG8zx>Y!ǎ+fr!y5 )ˉ+v2Sh!3e wf*D:me/!#ώYoD\AH^͢Bf*mS2٤#2Sh!3e wf*ɯvFF8Сh@/;S"ώ,J\AH^͢Bf*/\'362Srg$P猊^#fmG8z{aWRU1(j Bj->2SQu_N'3si:\}8 Ӓd=x'Jvt:ISn-D~~r4Am)Tc:T8aȞg:2'r+nL*=U %j Bj->2SQH}sgʬwOb(hz%H4XԘ4~+wrC4C̥2wqRTcʚqK)GCo ."kkkRZHU%lIZEυT 2ܿQA T42=m_eJΤ%}ښlmbLZc<1 ٯ5=d\zw,\.2VTĜB`%מ\r^C=.zi+">,Z|.dAHj"~ KFw2qiggי|UqbVIi^7rlb^!{A2y˝\i0$f2S3iCh(kaBS^͢Bf*Bf*2qigo]'F 4rl'mib&0d!{qƲiYon%y-)Tk`raWh񹐙EbM@,oT+ ~\&.%g6_V#/_W m?Đ=sPc Sשy7&>3uV~5L}ʫY\Lc0Jhb#3^d\z9Xv\E2+)\&xwFOy5 p$T}8Tsm~?T)^~FhU+*-j4˔@.aZ Fp"3^8y]BJqp8ȅt WfJHKk=~?]!)fsj%3%dJW?CfQz\z^8_Wx~V&R~!C̥ce^ )넛&ȷ[kgܧEυTP2h衫}'t<o/6~Lp)3SmmTⅾDx^e־ 6d\z9XzdIucygf_E}ʫY\LU)޵Td4?T%e{e@\{CB 3^9Iil=>:qVϙ)!_!>,Z|.d*~z i#3uƠCfkƜͯЛSwxQܷÐ]C!{a2T#G>shL ׹ş rj->2Sq:kO̭$I.={Kzq_2ӎlPŃvb(UCJtd~sd!3u24ߕ@}ʫY\Lv*^dP)P"6~ O<0^ =pe2SO}Afj6ڲYfBS^͢Bf*0Bf »}*Ƈz4E{uȐ^zzpLݒd>Prj->2S}Of [;~uo%[2dcf7EFNK 2Sw.uGBٓaOy5 6￐j)Df o bVCwaK/Lyrz$u߷BS^͢Bf+Hx]ʞhЏIRW>k}RVQ K4:d\z9`faI'd~3 CxE}ʫY\L㉅ 'юXT@2h ),{GAſiE8q )Cg.-3{/k%#3B"!)fs!3Ն_>+Df*伋DWSLa#JUZaRjѼ]N*=sP)ϘlA}ʫY\L5۲|L!0 Z_4kw'Z3hmb~*ǐ=s8q}ǣ&/dlrj->2S A{q{2SXHV)Ȫ&)!6#dZk4BB!{2S ־W2S6Oy5 ugz'r4M PS\&.?ݢW~N~u]ߝK}Y!{˾"u׷)r qd EυTK̔=?X).&t%KD8/W* C̥qa*i#3eWh񹐙jGcZFf*YHW2˲LESLa`vLb6-)Wk%ޒŶV:!{&W?cmoʠ#Ņ]?rpK.̔)WkWh񹐙jLDd<ӏL5G۫).ڳ[7U2=m!{!m6ˎj$k 3e1\Lϵno^TX^M!3?MӤ=,0<+KZWA+x;0d/&zQǟ 2S6C^ɅT{^/0"<-TX^M!3?/ݫ<~/j \'^C셴D/;j3Lf E{%2SM~.ѹ2Sai{5&H+KZoo!{!m6ˎ8dJJ.dZtBf e=V }wj쥡/u%3e1\L0_@f*,mkTjАJ"5>i= a^H۫M^'3e1\L5n/^ K۫)dB /vF ^nurR~셴D/{iK3Lf E{%2S ?bB^M!3\./)L+KZ| e!{!m6. W)܌hBfm8^?]`Af*,m 'C[zyEx)WkolS0d/&zE[kLf E{%2Smk)岊TX^M!3ߌևV8/j ˆD셴D/kkÔ 3e1\L5OYRTX^MɑI[#xEx)Wk2S.D/k8B2S6C^ɅT6?b i{5%GfJNV)EkߓP3^>Zkk<݋Wec-EHf7c(+B\djJ̔)frJהISj\A jlICJ.d)3%b&rMIyHͽWr18NLjl)ZZ̔ nPWr!3LՔd){^SZv|7wz^mGf7^mʹ)܌hBf q K۫)2S"ڷh1 V݉ͦŷ6R YjU(!{!m66γJ.d23%B%Np4M\xEx)WkK0i%J`^H۫Mn 2S6C^%zTsLՔ)gjVḁZ~xEx)Wkh{^ 3e1\r~L5TX^MI2"]im~qNӵ5z\A dz&zi)Af7c(+TOj;3u>#|U֦ΏZ^^@f7^mɑdlp3KO搙 K۫)3Sw͕V^砋hyz"|&3m6˕ #-%Lf E{%)2S!3WSgoCh=“{xEx)Wkh{^.N)܌hS=Ef9djJ)wEBۅt_'RJj^^@f7^m˒4MڸYJ.9?SdCf*,mi֜UE80nm[% ՅWr1 i{^.%VKL#3_+TOj2q_-Vk?f8Wr1N%0d/&zy=2$dlp3KO搙 K۫)dcCk[vrϗN'J^^^@f7^mH"3e1\r~L5TX^M'3xh gqB7|++/j b֚\+QCB^m?#S7\J.dOfJx;->g o抖@)+GkX:CB^m߲c'_j1#3e1\r~L5TX^M*3%S-->~8Nϳ"5ѲCB^mldX9N%fdlp3 )e i{5̔ }Z|T9ۑ. /j Mr&zn7 t>J.dUfJZeGu8^dǣ3R H8+{1d/&zY|XFyq$̔ nPWr!3LՔ2S.Oˎ*z s&M["5w.T6$2d/&^@-wʣmY0="N|pŴ5 Iip\L!.O]H۫).)T7?H)/Zg"oxC-/NN^] i{ɥ\&xdK{%2STX^MqvֵϫedQ+ ozi^^xs?e- i{ɥL̔ Bn(+B\d\-EʽՂ~XESzs]ðN&u mIZo0hm:!{&W?!i^--mfR $fs"32S.d <wVѦ2e%{輢Эn"QkeO1CE!{dBf!)fs!3L0 OW2M!3e@ˎ'b^MOGxWV[ɆK/j )ܧE%zTsLEp\5KӲ ~Oo> *g.Lf-dlrj->2S>s#+;HoSze!#vV/AFpa!=Gi6~$g.Lf-dlrj->2S>s_; 럛rh li M+p8h}wzů(ךg.Lf-dlrj->2ST@4~8xm#q66bFx~x;tۡ˵;d\zT[L >,Z|.d~[a 3->Xxd{+𯿸$%g.Lf-dlrj->2SuB\VwdpKB~&.i ˆsy|w[gw }jlcȎl=se2Sm!3eWh񹐙K M^&_b.uКL\Z0}޴Lѩǟ 옊 3^&32S6Oy5 ߲p$s!k'װ, jj%bEV +H(x-K4lR!;RC̥L̔ BS^͢Bf.)wX,LU<:ZH|5첽qS*!"=/uU)8d\zT[L >,Z|.d꺆a >0'3UUbeibװ;ZIp6m\?>ZZJh͊bȎ=sN)Oy5 aJ᠓O̍@Twf#SSkQJ}^P0N"|TwB#4d\zT[L >,Z|.dÖKmLd s\VǷ__fjURǣlzw[Wt0d:d\zG@m'xArj->2SaZN>1|>\QkktGZoMoSi~JѴ^fOiE1d{1g.Lf-dlrj->2Su݆=^*x̭J>-&8_$O8ھJeO^Z!۞ِ=sUuHqG}ʫY\Lu\*}b_gV~׬p:yk1@³FsU3^ţ,Rrj->2Su݆-6B&"Zu2SE%ǠZ-W^wq*e&_?[aȮqȞ2A}ʫY\Lu"QTU+/2SUD.w]YӚj(6n\hNhʆ; ٕDg.,_G :Y#>,Z|.d [2_w: ܂j?S٧÷tUR/QQ{O0dmȞ2A}ʫY\L~#VLU<.$KV rΊ?}Ms aD'>d>ҶnPMRwo2d|ȞrӁCi&xrj->2Su=Ssaz|I'E=u^+R0"װܛWZK?ůCܩ4,췴5d\zYZIG :Y#>,Z|.dz?iEotb&Sֻ]/nVՉ\ZT\!j_*^*j :dh1x@K{P>&QHZEυT]OÖKM{:w w9h[;s7CljJFQcȞә0d+V0y5 TO+zTw2S=X+OCdMthj.Lf-dlxSfs!3Uoa+ߦd`Dfʀ>dtoS* 3^&32S6Bn Wh񹐙뷰u鷛lt2S=p1CqDx36K:ԝ<ͥL̔ +j Bj->2Su[Q/N!3d h[εw8ubf!A˺3T[L 6ǼEυT]/t 萙Lж6 η.olC}%Gf-dlx\)WkWh񹐙Eؒ9h͞!3U2mmϽxކ*eJK/j )^!lLWh񹐙u:s; Ls~r F=ej4orCl.zJh:Y R $fs!3Uu>x쫓4Q/ h[sG8j7o`&mPG6K/j )^!W,Z|.d3l菺)Te0 t{mm(}]ikjNl`Ȟz,Ry\)WkWh񹐙ϰ5?NWpQ.po v;-N,ȬDJ">+:ԅ2r\AH^͢Bf%aU%$3zl.poDa}'LN7&*H~ua.l1c )3?y5 ?d+u3{r Fڊ-46~w|_qt= S¥L̔ +j Bj->2Su- [&L5mkۅ"ʶ)"|=.+:Ԟِ=se5:Y .r^͢BfaK-}; Lsm[.poMH|RDf/ m!Wr8mѡ,K//#N)B5ɫY\Lյ0lMӤI)԰MRLж/­U/5#CY3^n"a"ŝWȕr!y5 -I7Igk\k7ţE [O5jxȞ2rG%EυT]Öb}BqO_Lж[#"wuB_8NC-l.?V'w^!W,Z|.dz+lm6g&ު['Ai.pok\.ơ8]Ex/)"|Y 3:Ԍ׆)dBfʆWȕr!y5 "gzIgk~5m*ڷGKEHDҡfLxE;+j Bj->2Su}Ef  d h[W*ߕDf4zP+p^&32S6B5ɫY\LnAee}6\k7u\.h]v;~_בCwP.CM#3e+?y/\Ln2݄Of xAMۺ8= /􉰓H"|AR5&?nxhxe2Su}0l `Y+tkdGInZƜ$yOF&"zҲt!{t1Ҟw+AH\Պr5܊hV"yQ}^Y]%]E^ĉ`1DCkg?IjX}ϫ{y:QAB QZKsv!3UzayFP2:9OoKPU8/8|]Wvӡܿ19c&&(H|CnLB|D"d,R8V =dB~SengMnEE*F~FjotD56v }[V hU[3{Tc&Gfr+VWgB5kv9O8>0J ZB'9E_#ܗ5vO:m*qD:ڂ"<# x|ۦLqnZZ&sZrgdE Kȍ0e3;SDI\ Сܓ2AKvPC_$AB6ؗY_|T]/cj<-83glk7iGM9"vGt ߮^zx _M8" qvY_H?Ff5Gy53*&&׏!RW=[Hf6Lͼ&D?ѡ6|{?Z/v; %`H6uwl/"wLյ&R|js\.6i)갤;a7Ժ F>i:mHP.CLD^O???1v9?6&b/NMEf5+<2S1g?)?}u?6[+OK ˶Ȫf6y"@Z>\/Dq\ ,ТI\$wU%xȕLlAO_y79 `!ҘIfൿO~dRodʥuwOkVo0Đ Xgl!3UW}L25)ry/LܙZ3Xy,O_Pw7[ Dz sA#3UW'cE!9۰I\o z["|g_G I)EsZ|Q/Y,`Z䔽%w\38/O1d@zP ;@V&7W#ۓ~?)m2v:N,yUD`!!1Yk;Bbh]Iӻ^??? Q.G]:vݔss Yx3^ǢpTikϨt{r{kWd:y? Wpv]],$YT6unx'4M1zЋt޼nP!lt"XCˁDK76omYEl׼V٧O<0dd,(u^ Ƿ\l*}9mYrUꑴ7J~;B:> l0yfT򅄴`l+HNɕk~iHG<m{;}'!t\d޾>%" ۻl6e'#a?/ Ww^rD9Ay;.Nq\7 i -NQcqnrMѿ8a|>5ɿ4d^HD>DIzwC#ת^ҿ@淉g' ?N5[m.$ ^ֻhq%# lB2QSdP U<4| 5O_qn !.;_XB o9Y;N X6WܒumJ/ C632?N4 TF…3E@4xtmJ C6`|<JQ/Bt:i4h4Byai.)_' Ri:H[r_4{%qH$&4 re< / kX8a']T}s_rT!\$Ry۔!P/i%s,$v0GPȊxp8H0?0d |}}]"6d!q<[vq)@|\x#l9Kjݎ? iЕI+p:~Х|MTK4O<|fJ>8nM_#t mg.5O=l6$i;3UsKބ g_Uf{vl:NO*a.:Cdfj&M)LMTIj,3U7pXfjeZh)35 f>}\w[3o0|>/@ mdN&޴lqHLy7{^+zfjf*_gi)NhBrvmZ[=b -nifMK;hK0 pZ.3VZ닭R֜Q(3u:4naZsBdiҴKvfjfɧ{Jd✙Zs|<3Sejv[frv;?p8O ԟif? |2S___zzASP6tҟ@j֙inw>ifEZ[}LGBvseN&ezl68O?3SOR___vйi6 *fRV5V+30KKq9L].vI;gL].nI9Lݦ?8/LݦobkZP0 ~g*ap9޲635 2S03SG L) 3/.o-IENDB`megadepth-1.2.0/megadepthd000077500000000000000000000002241420302544700154770ustar00rootroot00000000000000#!/usr/bin/env bash #get this script's path p=$(dirname $0) export LD_LIBRARY_PATH=$p/libBigWig:$p/htslib:$LD_LIBRARY_PATH $p/megadepth_debug "$@" megadepth-1.2.0/release.sh000077500000000000000000000004011420302544700154230ustar00rootroot00000000000000#!/usr/bin/env bash #build "statlib" version (or main linux version) of megadepth ./run_hbb_build_container.sh ./run_macos_build_container.sh ./run_windows_build_container.sh #build latest Docker runner image: /bin/bash -x create_docker_to_run_megadepth.sh megadepth-1.2.0/robin_hood.h000066400000000000000000002426321420302544700157550ustar00rootroot00000000000000// ______ _____ ______ _________ // ______________ ___ /_ ___(_)_______ ___ /_ ______ ______ ______ / // __ ___/_ __ \__ __ \__ / __ __ \ __ __ \_ __ \_ __ \_ __ / // _ / / /_/ /_ /_/ /_ / _ / / / _ / / // /_/ // /_/ // /_/ / // /_/ \____/ /_.___/ /_/ /_/ /_/ ________/_/ /_/ \____/ \____/ \__,_/ // _/_____/ // // Fast & memory efficient hashtable based on robin hood hashing for C++11/14/17/20 // version 3.7.0 // https://github.com/martinus/robin-hood-hashing // // Licensed under the MIT License . // SPDX-License-Identifier: MIT // Copyright (c) 2018-2020 Martin Ankerl // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. #ifndef ROBIN_HOOD_H_INCLUDED #define ROBIN_HOOD_H_INCLUDED // see https://semver.org/ #define ROBIN_HOOD_VERSION_MAJOR 3 // for incompatible API changes #define ROBIN_HOOD_VERSION_MINOR 7 // for adding functionality in a backwards-compatible manner #define ROBIN_HOOD_VERSION_PATCH 0 // for backwards-compatible bug fixes #include #include #include #include #include #include #include #include // #define ROBIN_HOOD_LOG_ENABLED #ifdef ROBIN_HOOD_LOG_ENABLED # include # define ROBIN_HOOD_LOG(x) std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << x << std::endl #else # define ROBIN_HOOD_LOG(x) #endif // #define ROBIN_HOOD_TRACE_ENABLED #ifdef ROBIN_HOOD_TRACE_ENABLED # include # define ROBIN_HOOD_TRACE(x) \ std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << x << std::endl #else # define ROBIN_HOOD_TRACE(x) #endif // #define ROBIN_HOOD_COUNT_ENABLED #ifdef ROBIN_HOOD_COUNT_ENABLED # include # define ROBIN_HOOD_COUNT(x) ++counts().x; namespace robin_hood { struct Counts { uint64_t shiftUp{}; uint64_t shiftDown{}; }; inline std::ostream& operator<<(std::ostream& os, Counts const& c) { return os << c.shiftUp << " shiftUp" << std::endl << c.shiftDown << " shiftDown" << std::endl; } static Counts& counts() { static Counts counts{}; return counts; } } // namespace robin_hood #else # define ROBIN_HOOD_COUNT(x) #endif // all non-argument macros should use this facility. See // https://www.fluentcpp.com/2019/05/28/better-macros-better-flags/ #define ROBIN_HOOD(x) ROBIN_HOOD_PRIVATE_DEFINITION_##x() // mark unused members with this macro #define ROBIN_HOOD_UNUSED(identifier) // bitness #if SIZE_MAX == UINT32_MAX # define ROBIN_HOOD_PRIVATE_DEFINITION_BITNESS() 32 #elif SIZE_MAX == UINT64_MAX # define ROBIN_HOOD_PRIVATE_DEFINITION_BITNESS() 64 #else # error Unsupported bitness #endif // endianess #ifdef _MSC_VER # define ROBIN_HOOD_PRIVATE_DEFINITION_LITTLE_ENDIAN() 1 # define ROBIN_HOOD_PRIVATE_DEFINITION_BIG_ENDIAN() 0 #else # define ROBIN_HOOD_PRIVATE_DEFINITION_LITTLE_ENDIAN() \ (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) # define ROBIN_HOOD_PRIVATE_DEFINITION_BIG_ENDIAN() (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) #endif // inline #ifdef _MSC_VER # define ROBIN_HOOD_PRIVATE_DEFINITION_NOINLINE() __declspec(noinline) #else # define ROBIN_HOOD_PRIVATE_DEFINITION_NOINLINE() __attribute__((noinline)) #endif // exceptions #if !defined(__cpp_exceptions) && !defined(__EXCEPTIONS) && !defined(_CPPUNWIND) # define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_EXCEPTIONS() 0 #else # define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_EXCEPTIONS() 1 #endif // count leading/trailing bits #if ((defined __i386 || defined __x86_64__) && defined __BMI__) || defined _M_IX86 || defined _M_X64 # ifdef _MSC_VER # include # else # include # endif # if ROBIN_HOOD(BITNESS) == 32 # define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() _tzcnt_u32 # else # define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() _tzcnt_u64 # endif # if defined __AVX2__ || defined __BMI__ # define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) ROBIN_HOOD(CTZ)(x) # else # define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) ROBIN_HOOD(CTZ)(x) # endif #elif defined _MSC_VER # if ROBIN_HOOD(BITNESS) == 32 # define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward # else # define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward64 # endif # include # pragma intrinsic(ROBIN_HOOD(BITSCANFORWARD)) # define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) \ [](size_t mask) noexcept -> int { \ unsigned long index; \ return ROBIN_HOOD(BITSCANFORWARD)(&index, mask) ? static_cast(index) \ : ROBIN_HOOD(BITNESS); \ }(x) #else # if ROBIN_HOOD(BITNESS) == 32 # define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzl # define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzl # else # define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzll # define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzll # endif # define ROBIN_HOOD_COUNT_LEADING_ZEROES(x) ((x) ? ROBIN_HOOD(CLZ)(x) : ROBIN_HOOD(BITNESS)) # define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) ((x) ? ROBIN_HOOD(CTZ)(x) : ROBIN_HOOD(BITNESS)) #endif // fallthrough #ifndef __has_cpp_attribute // For backwards compatibility # define __has_cpp_attribute(x) 0 #endif #if __has_cpp_attribute(clang::fallthrough) # define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH() [[clang::fallthrough]] #elif __has_cpp_attribute(gnu::fallthrough) # define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH() [[gnu::fallthrough]] #else # define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH() #endif // likely/unlikely #ifdef _MSC_VER # define ROBIN_HOOD_LIKELY(condition) condition # define ROBIN_HOOD_UNLIKELY(condition) condition #else # define ROBIN_HOOD_LIKELY(condition) __builtin_expect(condition, 1) # define ROBIN_HOOD_UNLIKELY(condition) __builtin_expect(condition, 0) #endif // workaround missing "is_trivially_copyable" in g++ < 5.0 // See https://stackoverflow.com/a/31798726/48181 #if defined(__GNUC__) && __GNUC__ < 5 # define ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__) #else # define ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value #endif // helpers for C++ versions, see https://gcc.gnu.org/onlinedocs/cpp/Standard-Predefined-Macros.html #define ROBIN_HOOD_PRIVATE_DEFINITION_CXX() __cplusplus #define ROBIN_HOOD_PRIVATE_DEFINITION_CXX98() 199711L #define ROBIN_HOOD_PRIVATE_DEFINITION_CXX11() 201103L #define ROBIN_HOOD_PRIVATE_DEFINITION_CXX14() 201402L #define ROBIN_HOOD_PRIVATE_DEFINITION_CXX17() 201703L #if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX17) # define ROBIN_HOOD_PRIVATE_DEFINITION_NODISCARD() [[nodiscard]] #else # define ROBIN_HOOD_PRIVATE_DEFINITION_NODISCARD() #endif namespace robin_hood { #if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX14) # define ROBIN_HOOD_STD std #else // c++11 compatibility layer namespace ROBIN_HOOD_STD { template struct alignment_of : std::integral_constant::type)> {}; template class integer_sequence { public: using value_type = T; static_assert(std::is_integral::value, "not integral type"); static constexpr std::size_t size() noexcept { return sizeof...(Ints); } }; template using index_sequence = integer_sequence; namespace detail_ { template struct IntSeqImpl { using TValue = T; static_assert(std::is_integral::value, "not integral type"); static_assert(Begin >= 0 && Begin < End, "unexpected argument (Begin<0 || Begin<=End)"); template struct IntSeqCombiner; template struct IntSeqCombiner, integer_sequence> { using TResult = integer_sequence; }; using TResult = typename IntSeqCombiner::TResult, typename IntSeqImpl::TResult>::TResult; }; template struct IntSeqImpl { using TValue = T; static_assert(std::is_integral::value, "not integral type"); static_assert(Begin >= 0, "unexpected argument (Begin<0)"); using TResult = integer_sequence; }; template struct IntSeqImpl { using TValue = T; static_assert(std::is_integral::value, "not integral type"); static_assert(Begin >= 0, "unexpected argument (Begin<0)"); using TResult = integer_sequence; }; } // namespace detail_ template using make_integer_sequence = typename detail_::IntSeqImpl::TResult; template using make_index_sequence = make_integer_sequence; template using index_sequence_for = make_index_sequence; } // namespace ROBIN_HOOD_STD #endif namespace detail { template T rotr(T x, unsigned k) { return (x >> k) | (x << (8U * sizeof(T) - k)); } // This cast gets rid of warnings like "cast from 'uint8_t*' {aka 'unsigned char*'} to // 'uint64_t*' {aka 'long unsigned int*'} increases required alignment of target type". Use with // care! template inline T reinterpret_cast_no_cast_align_warning(void* ptr) noexcept { return reinterpret_cast(ptr); } template inline T reinterpret_cast_no_cast_align_warning(void const* ptr) noexcept { return reinterpret_cast(ptr); } // make sure this is not inlined as it is slow and dramatically enlarges code, thus making other // inlinings more difficult. Throws are also generally the slow path. template ROBIN_HOOD(NOINLINE) #if ROBIN_HOOD(HAS_EXCEPTIONS) void doThrow(Args&&... args) { // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-array-to-pointer-decay) throw E(std::forward(args)...); } #else void doThrow(Args&&... ROBIN_HOOD_UNUSED(args) /*unused*/) { abort(); } #endif template T* assertNotNull(T* t, Args&&... args) { if (ROBIN_HOOD_UNLIKELY(nullptr == t)) { doThrow(std::forward(args)...); } return t; } template inline T unaligned_load(void const* ptr) noexcept { // using memcpy so we don't get into unaligned load problems. // compiler should optimize this very well anyways. T t; std::memcpy(&t, ptr, sizeof(T)); return t; } // Allocates bulks of memory for objects of type T. This deallocates the memory in the destructor, // and keeps a linked list of the allocated memory around. Overhead per allocation is the size of a // pointer. template class BulkPoolAllocator { public: BulkPoolAllocator() noexcept = default; // does not copy anything, just creates a new allocator. BulkPoolAllocator(const BulkPoolAllocator& ROBIN_HOOD_UNUSED(o) /*unused*/) noexcept : mHead(nullptr) , mListForFree(nullptr) {} BulkPoolAllocator(BulkPoolAllocator&& o) noexcept : mHead(o.mHead) , mListForFree(o.mListForFree) { o.mListForFree = nullptr; o.mHead = nullptr; } BulkPoolAllocator& operator=(BulkPoolAllocator&& o) noexcept { reset(); mHead = o.mHead; mListForFree = o.mListForFree; o.mListForFree = nullptr; o.mHead = nullptr; return *this; } BulkPoolAllocator& // NOLINTNEXTLINE(bugprone-unhandled-self-assignment,cert-oop54-cpp) operator=(const BulkPoolAllocator& ROBIN_HOOD_UNUSED(o) /*unused*/) noexcept { // does not do anything return *this; } ~BulkPoolAllocator() noexcept { reset(); } // Deallocates all allocated memory. void reset() noexcept { while (mListForFree) { T* tmp = *mListForFree; free(mListForFree); mListForFree = reinterpret_cast_no_cast_align_warning(tmp); } mHead = nullptr; } // allocates, but does NOT initialize. Use in-place new constructor, e.g. // T* obj = pool.allocate(); // ::new (static_cast(obj)) T(); T* allocate() { T* tmp = mHead; if (!tmp) { tmp = performAllocation(); } mHead = *reinterpret_cast_no_cast_align_warning(tmp); return tmp; } // does not actually deallocate but puts it in store. // make sure you have already called the destructor! e.g. with // obj->~T(); // pool.deallocate(obj); void deallocate(T* obj) noexcept { *reinterpret_cast_no_cast_align_warning(obj) = mHead; mHead = obj; } // Adds an already allocated block of memory to the allocator. This allocator is from now on // responsible for freeing the data (with free()). If the provided data is not large enough to // make use of, it is immediately freed. Otherwise it is reused and freed in the destructor. void addOrFree(void* ptr, const size_t numBytes) noexcept { // calculate number of available elements in ptr if (numBytes < ALIGNMENT + ALIGNED_SIZE) { // not enough data for at least one element. Free and return. free(ptr); } else { add(ptr, numBytes); } } void swap(BulkPoolAllocator& other) noexcept { using std::swap; swap(mHead, other.mHead); swap(mListForFree, other.mListForFree); } private: // iterates the list of allocated memory to calculate how many to alloc next. // Recalculating this each time saves us a size_t member. // This ignores the fact that memory blocks might have been added manually with addOrFree. In // practice, this should not matter much. ROBIN_HOOD(NODISCARD) size_t calcNumElementsToAlloc() const noexcept { auto tmp = mListForFree; size_t numAllocs = MinNumAllocs; while (numAllocs * 2 <= MaxNumAllocs && tmp) { auto x = reinterpret_cast(tmp); tmp = *x; numAllocs *= 2; } return numAllocs; } // WARNING: Underflow if numBytes < ALIGNMENT! This is guarded in addOrFree(). void add(void* ptr, const size_t numBytes) noexcept { const size_t numElements = (numBytes - ALIGNMENT) / ALIGNED_SIZE; auto data = reinterpret_cast(ptr); // link free list auto x = reinterpret_cast(data); *x = mListForFree; mListForFree = data; // create linked list for newly allocated data auto* const headT = reinterpret_cast_no_cast_align_warning(reinterpret_cast(ptr) + ALIGNMENT); auto* const head = reinterpret_cast(headT); // Visual Studio compiler automatically unrolls this loop, which is pretty cool for (size_t i = 0; i < numElements; ++i) { *reinterpret_cast_no_cast_align_warning(head + i * ALIGNED_SIZE) = head + (i + 1) * ALIGNED_SIZE; } // last one points to 0 *reinterpret_cast_no_cast_align_warning(head + (numElements - 1) * ALIGNED_SIZE) = mHead; mHead = headT; } // Called when no memory is available (mHead == 0). // Don't inline this slow path. ROBIN_HOOD(NOINLINE) T* performAllocation() { size_t const numElementsToAlloc = calcNumElementsToAlloc(); // alloc new memory: [prev |T, T, ... T] // std::cout << (sizeof(T*) + ALIGNED_SIZE * numElementsToAlloc) << " bytes" << std::endl; size_t const bytes = ALIGNMENT + ALIGNED_SIZE * numElementsToAlloc; add(assertNotNull(malloc(bytes)), bytes); return mHead; } // enforce byte alignment of the T's #if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX14) static constexpr size_t ALIGNMENT = (std::max)(std::alignment_of::value, std::alignment_of::value); #else static const size_t ALIGNMENT = (ROBIN_HOOD_STD::alignment_of::value > ROBIN_HOOD_STD::alignment_of::value) ? ROBIN_HOOD_STD::alignment_of::value : +ROBIN_HOOD_STD::alignment_of::value; // the + is for walkarround #endif static constexpr size_t ALIGNED_SIZE = ((sizeof(T) - 1) / ALIGNMENT + 1) * ALIGNMENT; static_assert(MinNumAllocs >= 1, "MinNumAllocs"); static_assert(MaxNumAllocs >= MinNumAllocs, "MaxNumAllocs"); static_assert(ALIGNED_SIZE >= sizeof(T*), "ALIGNED_SIZE"); static_assert(0 == (ALIGNED_SIZE % sizeof(T*)), "ALIGNED_SIZE mod"); static_assert(ALIGNMENT >= sizeof(T*), "ALIGNMENT"); T* mHead{nullptr}; T** mListForFree{nullptr}; }; template struct NodeAllocator; // dummy allocator that does nothing template struct NodeAllocator { // we are not using the data, so just free it. void addOrFree(void* ptr, size_t ROBIN_HOOD_UNUSED(numBytes) /*unused*/) noexcept { free(ptr); } }; template struct NodeAllocator : public BulkPoolAllocator {}; // dummy hash, unsed as mixer when robin_hood::hash is already used template struct identity_hash { constexpr size_t operator()(T const& obj) const noexcept { return static_cast(obj); } }; // c++14 doesn't have is_nothrow_swappable, and clang++ 6.0.1 doesn't like it either, so I'm making // my own here. namespace swappable { #if ROBIN_HOOD(CXX) < ROBIN_HOOD(CXX17) using std::swap; template struct nothrow { static const bool value = noexcept(swap(std::declval(), std::declval())); }; #else template struct nothrow { static const bool value = std::is_nothrow_swappable::value; }; #endif } // namespace swappable } // namespace detail struct is_transparent_tag {}; // A custom pair implementation is used in the map because std::pair is not is_trivially_copyable, // which means it would not be allowed to be used in std::memcpy. This struct is copyable, which is // also tested. template struct pair { using first_type = T1; using second_type = T2; template ::value && std::is_default_constructible::value>::type> constexpr pair() noexcept(noexcept(U1()) && noexcept(U2())) : first() , second() {} // pair constructors are explicit so we don't accidentally call this ctor when we don't have to. explicit constexpr pair(std::pair const& o) noexcept( noexcept(T1(std::declval())) && noexcept(T2(std::declval()))) : first(o.first) , second(o.second) {} // pair constructors are explicit so we don't accidentally call this ctor when we don't have to. explicit constexpr pair(std::pair&& o) noexcept(noexcept( T1(std::move(std::declval()))) && noexcept(T2(std::move(std::declval())))) : first(std::move(o.first)) , second(std::move(o.second)) {} constexpr pair(T1&& a, T2&& b) noexcept(noexcept( T1(std::move(std::declval()))) && noexcept(T2(std::move(std::declval())))) : first(std::move(a)) , second(std::move(b)) {} template constexpr pair(U1&& a, U2&& b) noexcept(noexcept(T1(std::forward( std::declval()))) && noexcept(T2(std::forward(std::declval())))) : first(std::forward(a)) , second(std::forward(b)) {} template constexpr pair( std::piecewise_construct_t /*unused*/, std::tuple a, std::tuple b) noexcept(noexcept(pair(std::declval&>(), std::declval&>(), ROBIN_HOOD_STD::index_sequence_for(), ROBIN_HOOD_STD::index_sequence_for()))) : pair(a, b, ROBIN_HOOD_STD::index_sequence_for(), ROBIN_HOOD_STD::index_sequence_for()) {} // constructor called from the std::piecewise_construct_t ctor template pair(std::tuple& a, std::tuple& b, ROBIN_HOOD_STD::index_sequence /*unused*/, ROBIN_HOOD_STD::index_sequence /*unused*/) noexcept( noexcept(T1(std::forward(std::get( std::declval&>()))...)) && noexcept(T2(std:: forward(std::get( std::declval&>()))...))) : first(std::forward(std::get(a))...) , second(std::forward(std::get(b))...) { // make visual studio compiler happy about warning about unused a & b. // Visual studio's pair implementation disables warning 4100. (void)a; (void)b; } void swap(pair& o) noexcept((detail::swappable::nothrow::value) && (detail::swappable::nothrow::value)) { using std::swap; swap(first, o.first); swap(second, o.second); } T1 first; // NOLINT(misc-non-private-member-variables-in-classes) T2 second; // NOLINT(misc-non-private-member-variables-in-classes) }; template inline void swap(pair& a, pair& b) noexcept( noexcept(std::declval&>().swap(std::declval&>()))) { a.swap(b); } template inline constexpr bool operator==(pair const& x, pair const& y) { return (x.first == y.first) && (x.second == y.second); } template inline constexpr bool operator!=(pair const& x, pair const& y) { return !(x == y); } template inline constexpr bool operator<(pair const& x, pair const& y) noexcept(noexcept( std::declval() < std::declval()) && noexcept(std::declval() < std::declval())) { return x.first < y.first || (!(y.first < x.first) && x.second < y.second); } template inline constexpr bool operator>(pair const& x, pair const& y) { return y < x; } template inline constexpr bool operator<=(pair const& x, pair const& y) { return !(x > y); } template inline constexpr bool operator>=(pair const& x, pair const& y) { return !(x < y); } static size_t hash_bytes(void const* ptr, size_t const len) noexcept { static constexpr uint64_t m = UINT64_C(0xc6a4a7935bd1e995); static constexpr uint64_t seed = UINT64_C(0xe17a1465); static constexpr unsigned int r = 47; auto const* const data64 = static_cast(ptr); uint64_t h = seed ^ (len * m); size_t const n_blocks = len / 8; for (size_t i = 0; i < n_blocks; ++i) { auto k = detail::unaligned_load(data64 + i); k *= m; k ^= k >> r; k *= m; h ^= k; h *= m; } auto const* const data8 = reinterpret_cast(data64 + n_blocks); switch (len & 7U) { case 7: h ^= static_cast(data8[6]) << 48U; ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH case 6: h ^= static_cast(data8[5]) << 40U; ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH case 5: h ^= static_cast(data8[4]) << 32U; ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH case 4: h ^= static_cast(data8[3]) << 24U; ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH case 3: h ^= static_cast(data8[2]) << 16U; ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH case 2: h ^= static_cast(data8[1]) << 8U; ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH case 1: h ^= static_cast(data8[0]); h *= m; ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH default: break; } h ^= h >> r; h *= m; h ^= h >> r; return static_cast(h); } inline size_t hash_int(uint64_t x) noexcept { // inspired by lemire's strongly universal hashing // https://lemire.me/blog/2018/08/15/fast-strongly-universal-64-bit-hashing-everywhere/ // // Instead of shifts, we use rotations so we don't lose any bits. // // Added a final multiplcation with a constant for more mixing. It is most important that the // lower bits are well mixed. auto h1 = x * UINT64_C(0xA24BAED4963EE407); auto h2 = detail::rotr(x, 32U) * UINT64_C(0x9FB21C651E98DF25); auto h = detail::rotr(h1 + h2, 32U); return static_cast(h); } // A thin wrapper around std::hash, performing an additional simple mixing step of the result. template struct hash : public std::hash { size_t operator()(T const& obj) const noexcept(noexcept(std::declval>().operator()(std::declval()))) { // call base hash auto result = std::hash::operator()(obj); // return mixed of that, to be save against identity has return hash_int(static_cast(result)); } }; template <> struct hash { size_t operator()(std::string const& str) const noexcept { return hash_bytes(str.data(), str.size()); } }; template struct hash { size_t operator()(T* ptr) const noexcept { return hash_int(reinterpret_cast(ptr)); } }; #define ROBIN_HOOD_HASH_INT(T) \ template <> \ struct hash { \ size_t operator()(T obj) const noexcept { \ return hash_int(static_cast(obj)); \ } \ } #if defined(__GNUC__) && !defined(__clang__) # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wuseless-cast" #endif // see https://en.cppreference.com/w/cpp/utility/hash ROBIN_HOOD_HASH_INT(bool); ROBIN_HOOD_HASH_INT(char); ROBIN_HOOD_HASH_INT(signed char); ROBIN_HOOD_HASH_INT(unsigned char); ROBIN_HOOD_HASH_INT(char16_t); ROBIN_HOOD_HASH_INT(char32_t); ROBIN_HOOD_HASH_INT(wchar_t); ROBIN_HOOD_HASH_INT(short); ROBIN_HOOD_HASH_INT(unsigned short); ROBIN_HOOD_HASH_INT(int); ROBIN_HOOD_HASH_INT(unsigned int); ROBIN_HOOD_HASH_INT(long); ROBIN_HOOD_HASH_INT(long long); ROBIN_HOOD_HASH_INT(unsigned long); ROBIN_HOOD_HASH_INT(unsigned long long); #if defined(__GNUC__) && !defined(__clang__) # pragma GCC diagnostic pop #endif namespace detail { template struct has_is_transparent : public std::false_type {}; template struct has_is_transparent : public std::true_type {}; // using wrapper classes for hash and key_equal prevents the diamond problem when the same type // is used. see https://stackoverflow.com/a/28771920/48181 template struct WrapHash : public T { WrapHash() = default; explicit WrapHash(T const& o) noexcept(noexcept(T(std::declval()))) : T(o) {} }; template struct WrapKeyEqual : public T { WrapKeyEqual() = default; explicit WrapKeyEqual(T const& o) noexcept(noexcept(T(std::declval()))) : T(o) {} }; // A highly optimized hashmap implementation, using the Robin Hood algorithm. // // In most cases, this map should be usable as a drop-in replacement for std::unordered_map, but // be about 2x faster in most cases and require much less allocations. // // This implementation uses the following memory layout: // // [Node, Node, ... Node | info, info, ... infoSentinel ] // // * Node: either a DataNode that directly has the std::pair as member, // or a DataNode with a pointer to std::pair. Which DataNode representation to use // depends on how fast the swap() operation is. Heuristically, this is automatically choosen // based on sizeof(). there are always 2^n Nodes. // // * info: Each Node in the map has a corresponding info byte, so there are 2^n info bytes. // Each byte is initialized to 0, meaning the corresponding Node is empty. Set to 1 means the // corresponding node contains data. Set to 2 means the corresponding Node is filled, but it // actually belongs to the previous position and was pushed out because that place is already // taken. // // * infoSentinel: Sentinel byte set to 1, so that iterator's ++ can stop at end() without the // need for a idx variable. // // According to STL, order of templates has effect on throughput. That's why I've moved the // boolean to the front. // https://www.reddit.com/r/cpp/comments/ahp6iu/compile_time_binary_size_reductions_and_cs_future/eeguck4/ template class Table : public WrapHash, public WrapKeyEqual, detail::NodeAllocator< typename std::conditional< std::is_void::value, Key, robin_hood::pair::type, T>>::type, 4, 16384, IsFlat> { public: static constexpr bool is_flat = IsFlat; static constexpr bool is_map = !std::is_void::value; static constexpr bool is_set = !is_map; static constexpr bool is_transparent = has_is_transparent::value && has_is_transparent::value; using key_type = Key; using mapped_type = T; using value_type = typename std::conditional< is_set, Key, robin_hood::pair::type, T>>::type; using size_type = size_t; using hasher = Hash; using key_equal = KeyEqual; using Self = Table; private: static_assert(MaxLoadFactor100 > 10 && MaxLoadFactor100 < 100, "MaxLoadFactor100 needs to be >10 && < 100"); using WHash = WrapHash; using WKeyEqual = WrapKeyEqual; // configuration defaults // make sure we have 8 elements, needed to quickly rehash mInfo static constexpr size_t InitialNumElements = sizeof(uint64_t); static constexpr uint32_t InitialInfoNumBits = 5; static constexpr uint8_t InitialInfoInc = 1U << InitialInfoNumBits; static constexpr uint8_t InitialInfoHashShift = sizeof(size_t) * 8 - InitialInfoNumBits; using DataPool = detail::NodeAllocator; // type needs to be wider than uint8_t. using InfoType = uint32_t; // DataNode //////////////////////////////////////////////////////// // Primary template for the data node. We have special implementations for small and big // objects. For large objects it is assumed that swap() is fairly slow, so we allocate these // on the heap so swap merely swaps a pointer. template class DataNode {}; // Small: just allocate on the stack. template class DataNode final { public: template explicit DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, Args&&... args) noexcept( noexcept(value_type(std::forward(args)...))) : mData(std::forward(args)...) {} DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, DataNode&& n) noexcept( std::is_nothrow_move_constructible::value) : mData(std::move(n.mData)) {} // doesn't do anything void destroy(M& ROBIN_HOOD_UNUSED(map) /*unused*/) noexcept {} void destroyDoNotDeallocate() noexcept {} value_type const* operator->() const noexcept { return &mData; } value_type* operator->() noexcept { return &mData; } const value_type& operator*() const noexcept { return mData; } value_type& operator*() noexcept { return mData; } template ROBIN_HOOD(NODISCARD) typename std::enable_if::type getFirst() noexcept { return mData.first; } template ROBIN_HOOD(NODISCARD) typename std::enable_if::type getFirst() noexcept { return mData; } template ROBIN_HOOD(NODISCARD) typename std::enable_if::type getFirst() const noexcept { return mData.first; } template ROBIN_HOOD(NODISCARD) typename std::enable_if::type getFirst() const noexcept { return mData; } template ROBIN_HOOD(NODISCARD) typename std::enable_if::type getSecond() noexcept { return mData.second; } template ROBIN_HOOD(NODISCARD) typename std::enable_if::type getSecond() const noexcept { return mData.second; } void swap(DataNode& o) noexcept( noexcept(std::declval().swap(std::declval()))) { mData.swap(o.mData); } private: value_type mData; }; // big object: allocate on heap. template class DataNode { public: template explicit DataNode(M& map, Args&&... args) : mData(map.allocate()) { ::new (static_cast(mData)) value_type(std::forward(args)...); } DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, DataNode&& n) noexcept : mData(std::move(n.mData)) {} void destroy(M& map) noexcept { // don't deallocate, just put it into list of datapool. mData->~value_type(); map.deallocate(mData); } void destroyDoNotDeallocate() noexcept { mData->~value_type(); } value_type const* operator->() const noexcept { return mData; } value_type* operator->() noexcept { return mData; } const value_type& operator*() const { return *mData; } value_type& operator*() { return *mData; } template ROBIN_HOOD(NODISCARD) typename std::enable_if::type getFirst() noexcept { return mData->first; } template ROBIN_HOOD(NODISCARD) typename std::enable_if::type getFirst() noexcept { return *mData; } template ROBIN_HOOD(NODISCARD) typename std::enable_if::type getFirst() const noexcept { return mData->first; } template ROBIN_HOOD(NODISCARD) typename std::enable_if::type getFirst() const noexcept { return *mData; } template ROBIN_HOOD(NODISCARD) typename std::enable_if::type getSecond() noexcept { return mData->second; } template ROBIN_HOOD(NODISCARD) typename std::enable_if::type getSecond() const noexcept { return mData->second; } void swap(DataNode& o) noexcept { using std::swap; swap(mData, o.mData); } private: value_type* mData; }; using Node = DataNode; // helpers for doInsert: extract first entry (only const required) ROBIN_HOOD(NODISCARD) key_type const& getFirstConst(Node const& n) const noexcept { return n.getFirst(); } // in case we have void mapped_type, we are not using a pair, thus we just route k through. // No need to disable this because it's just not used if not applicable. ROBIN_HOOD(NODISCARD) key_type const& getFirstConst(key_type const& k) const noexcept { return k; } // in case we have non-void mapped_type, we have a standard robin_hood::pair template ROBIN_HOOD(NODISCARD) typename std::enable_if::value, key_type const&>::type getFirstConst(value_type const& vt) const noexcept { return vt.first; } // Cloner ////////////////////////////////////////////////////////// template struct Cloner; // fast path: Just copy data, without allocating anything. template struct Cloner { void operator()(M const& source, M& target) const { auto const* const src = reinterpret_cast(source.mKeyVals); auto* tgt = reinterpret_cast(target.mKeyVals); auto const numElementsWithBuffer = target.calcNumElementsWithBuffer(target.mMask + 1); std::copy(src, src + target.calcNumBytesTotal(numElementsWithBuffer), tgt); } }; template struct Cloner { void operator()(M const& s, M& t) const { auto const numElementsWithBuffer = t.calcNumElementsWithBuffer(t.mMask + 1); std::copy(s.mInfo, s.mInfo + t.calcNumBytesInfo(numElementsWithBuffer), t.mInfo); for (size_t i = 0; i < numElementsWithBuffer; ++i) { if (t.mInfo[i]) { ::new (static_cast(t.mKeyVals + i)) Node(t, *s.mKeyVals[i]); } } } }; // Destroyer /////////////////////////////////////////////////////// template struct Destroyer {}; template struct Destroyer { void nodes(M& m) const noexcept { m.mNumElements = 0; } void nodesDoNotDeallocate(M& m) const noexcept { m.mNumElements = 0; } }; template struct Destroyer { void nodes(M& m) const noexcept { m.mNumElements = 0; // clear also resets mInfo to 0, that's sometimes not necessary. auto const numElementsWithBuffer = m.calcNumElementsWithBuffer(m.mMask + 1); for (size_t idx = 0; idx < numElementsWithBuffer; ++idx) { if (0 != m.mInfo[idx]) { Node& n = m.mKeyVals[idx]; n.destroy(m); n.~Node(); } } } void nodesDoNotDeallocate(M& m) const noexcept { m.mNumElements = 0; // clear also resets mInfo to 0, that's sometimes not necessary. auto const numElementsWithBuffer = m.calcNumElementsWithBuffer(m.mMask + 1); for (size_t idx = 0; idx < numElementsWithBuffer; ++idx) { if (0 != m.mInfo[idx]) { Node& n = m.mKeyVals[idx]; n.destroyDoNotDeallocate(); n.~Node(); } } } }; // Iter //////////////////////////////////////////////////////////// struct fast_forward_tag {}; // generic iterator for both const_iterator and iterator. template // NOLINTNEXTLINE(hicpp-special-member-functions,cppcoreguidelines-special-member-functions) class Iter { private: using NodePtr = typename std::conditional::type; public: using difference_type = std::ptrdiff_t; using value_type = typename Self::value_type; using reference = typename std::conditional::type; using pointer = typename std::conditional::type; using iterator_category = std::forward_iterator_tag; // default constructed iterator can be compared to itself, but WON'T return true when // compared to end(). Iter() = default; // Rule of zero: nothing specified. The conversion constructor is only enabled for // iterator to const_iterator, so it doesn't accidentally work as a copy ctor. // Conversion constructor from iterator to const_iterator. template ::type> // NOLINTNEXTLINE(hicpp-explicit-conversions) Iter(Iter const& other) noexcept : mKeyVals(other.mKeyVals) , mInfo(other.mInfo) {} Iter(NodePtr valPtr, uint8_t const* infoPtr) noexcept : mKeyVals(valPtr) , mInfo(infoPtr) {} Iter(NodePtr valPtr, uint8_t const* infoPtr, fast_forward_tag ROBIN_HOOD_UNUSED(tag) /*unused*/) noexcept : mKeyVals(valPtr) , mInfo(infoPtr) { fastForward(); } template ::type> Iter& operator=(Iter const& other) noexcept { mKeyVals = other.mKeyVals; mInfo = other.mInfo; return *this; } // prefix increment. Undefined behavior if we are at end()! Iter& operator++() noexcept { mInfo++; mKeyVals++; fastForward(); return *this; } Iter operator++(int) noexcept { Iter tmp = *this; ++(*this); return std::move(tmp); } reference operator*() const { return **mKeyVals; } pointer operator->() const { return &**mKeyVals; } template bool operator==(Iter const& o) const noexcept { return mKeyVals == o.mKeyVals; } template bool operator!=(Iter const& o) const noexcept { return mKeyVals != o.mKeyVals; } private: // fast forward to the next non-free info byte void fastForward() noexcept { size_t n = 0; while (0U == (n = detail::unaligned_load(mInfo))) { mInfo += sizeof(size_t); mKeyVals += sizeof(size_t); } #if ROBIN_HOOD(LITTLE_ENDIAN) auto inc = ROBIN_HOOD_COUNT_TRAILING_ZEROES(n) / 8; #else auto inc = ROBIN_HOOD_COUNT_LEADING_ZEROES(n) / 8; #endif mInfo += inc; mKeyVals += inc; } friend class Table; NodePtr mKeyVals{nullptr}; uint8_t const* mInfo{nullptr}; }; //////////////////////////////////////////////////////////////////// // highly performance relevant code. // Lower bits are used for indexing into the array (2^n size) // The upper 1-5 bits need to be a reasonable good hash, to save comparisons. template void keyToIdx(HashKey&& key, size_t* idx, InfoType* info) const { // for a user-specified hash that is *not* robin_hood::hash, apply robin_hood::hash as // an additional mixing step. This serves as a bad hash prevention, if the given data is // badly mixed. using Mix = typename std::conditional, hasher>::value, ::robin_hood::detail::identity_hash, ::robin_hood::hash>::type; *idx = Mix{}(WHash::operator()(key)); *info = mInfoInc + static_cast(*idx >> mInfoHashShift); *idx &= mMask; } // forwards the index by one, wrapping around at the end void next(InfoType* info, size_t* idx) const noexcept { *idx = *idx + 1; *info += mInfoInc; } void nextWhileLess(InfoType* info, size_t* idx) const noexcept { // unrolling this by hand did not bring any speedups. while (*info < mInfo[*idx]) { next(info, idx); } } // Shift everything up by one element. Tries to move stuff around. void shiftUp(size_t startIdx, size_t const insertion_idx) noexcept(std::is_nothrow_move_assignable::value) { auto idx = startIdx; ::new (static_cast(mKeyVals + idx)) Node(std::move(mKeyVals[idx - 1])); while (--idx != insertion_idx) { mKeyVals[idx] = std::move(mKeyVals[idx - 1]); } idx = startIdx; while (idx != insertion_idx) { ROBIN_HOOD_COUNT(shiftUp) mInfo[idx] = static_cast(mInfo[idx - 1] + mInfoInc); if (ROBIN_HOOD_UNLIKELY(mInfo[idx] + mInfoInc > 0xFF)) { mMaxNumElementsAllowed = 0; } --idx; } } void shiftDown(size_t idx) noexcept(std::is_nothrow_move_assignable::value) { // until we find one that is either empty or has zero offset. // TODO(martinus) we don't need to move everything, just the last one for the same // bucket. mKeyVals[idx].destroy(*this); // until we find one that is either empty or has zero offset. while (mInfo[idx + 1] >= 2 * mInfoInc) { ROBIN_HOOD_COUNT(shiftDown) mInfo[idx] = static_cast(mInfo[idx + 1] - mInfoInc); mKeyVals[idx] = std::move(mKeyVals[idx + 1]); ++idx; } mInfo[idx] = 0; // don't destroy, we've moved it // mKeyVals[idx].destroy(*this); mKeyVals[idx].~Node(); } // copy of find(), except that it returns iterator instead of const_iterator. template ROBIN_HOOD(NODISCARD) size_t findIdx(Other const& key) const { size_t idx{}; InfoType info{}; keyToIdx(key, &idx, &info); do { // unrolling this twice gives a bit of a speedup. More unrolling did not help. if (info == mInfo[idx] && ROBIN_HOOD_LIKELY(WKeyEqual::operator()(key, mKeyVals[idx].getFirst()))) { return idx; } next(&info, &idx); if (info == mInfo[idx] && ROBIN_HOOD_LIKELY(WKeyEqual::operator()(key, mKeyVals[idx].getFirst()))) { return idx; } next(&info, &idx); } while (info <= mInfo[idx]); // nothing found! return mMask == 0 ? 0 : static_cast(std::distance( mKeyVals, reinterpret_cast_no_cast_align_warning(mInfo))); } void cloneData(const Table& o) { Cloner()(o, *this); } // inserts a keyval that is guaranteed to be new, e.g. when the hashmap is resized. // @return index where the element was created size_t insert_move(Node&& keyval) { // we don't retry, fail if overflowing // don't need to check max num elements if (0 == mMaxNumElementsAllowed && !try_increase_info()) { throwOverflowError(); // impossible to reach LCOV_EXCL_LINE } size_t idx{}; InfoType info{}; keyToIdx(keyval.getFirst(), &idx, &info); // skip forward. Use <= because we are certain that the element is not there. while (info <= mInfo[idx]) { idx = idx + 1; info += mInfoInc; } // key not found, so we are now exactly where we want to insert it. auto const insertion_idx = idx; auto const insertion_info = static_cast(info); if (ROBIN_HOOD_UNLIKELY(insertion_info + mInfoInc > 0xFF)) { mMaxNumElementsAllowed = 0; } // find an empty spot while (0 != mInfo[idx]) { next(&info, &idx); } auto& l = mKeyVals[insertion_idx]; if (idx == insertion_idx) { ::new (static_cast(&l)) Node(std::move(keyval)); } else { shiftUp(idx, insertion_idx); l = std::move(keyval); } // put at empty spot mInfo[insertion_idx] = insertion_info; ++mNumElements; return insertion_idx; } public: using iterator = Iter; using const_iterator = Iter; // Creates an empty hash map. Nothing is allocated yet, this happens at the first insert. // This tremendously speeds up ctor & dtor of a map that never receives an element. The // penalty is payed at the first insert, and not before. Lookup of this empty map works // because everybody points to DummyInfoByte::b. parameter bucket_count is dictated by the // standard, but we can ignore it. explicit Table( size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0, const Hash& h = Hash{}, const KeyEqual& equal = KeyEqual{}) noexcept(noexcept(Hash(h)) && noexcept(KeyEqual(equal))) : WHash(h) , WKeyEqual(equal) { ROBIN_HOOD_TRACE(this) } template Table(Iter first, Iter last, size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0, const Hash& h = Hash{}, const KeyEqual& equal = KeyEqual{}) : WHash(h) , WKeyEqual(equal) { ROBIN_HOOD_TRACE(this) insert(first, last); } Table(std::initializer_list initlist, size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0, const Hash& h = Hash{}, const KeyEqual& equal = KeyEqual{}) : WHash(h) , WKeyEqual(equal) { ROBIN_HOOD_TRACE(this) insert(initlist.begin(), initlist.end()); } Table(Table&& o) noexcept : WHash(std::move(static_cast(o))) , WKeyEqual(std::move(static_cast(o))) , DataPool(std::move(static_cast(o))) { ROBIN_HOOD_TRACE(this) if (o.mMask) { mKeyVals = std::move(o.mKeyVals); mInfo = std::move(o.mInfo); mNumElements = std::move(o.mNumElements); mMask = std::move(o.mMask); mMaxNumElementsAllowed = std::move(o.mMaxNumElementsAllowed); mInfoInc = std::move(o.mInfoInc); mInfoHashShift = std::move(o.mInfoHashShift); // set other's mask to 0 so its destructor won't do anything o.init(); } } Table& operator=(Table&& o) noexcept { ROBIN_HOOD_TRACE(this) if (&o != this) { if (o.mMask) { // only move stuff if the other map actually has some data destroy(); mKeyVals = std::move(o.mKeyVals); mInfo = std::move(o.mInfo); mNumElements = std::move(o.mNumElements); mMask = std::move(o.mMask); mMaxNumElementsAllowed = std::move(o.mMaxNumElementsAllowed); mInfoInc = std::move(o.mInfoInc); mInfoHashShift = std::move(o.mInfoHashShift); WHash::operator=(std::move(static_cast(o))); WKeyEqual::operator=(std::move(static_cast(o))); DataPool::operator=(std::move(static_cast(o))); o.init(); } else { // nothing in the other map => just clear us. clear(); } } return *this; } Table(const Table& o) : WHash(static_cast(o)) , WKeyEqual(static_cast(o)) , DataPool(static_cast(o)) { ROBIN_HOOD_TRACE(this) if (!o.empty()) { // not empty: create an exact copy. it is also possible to just iterate through all // elements and insert them, but copying is probably faster. auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1); mKeyVals = static_cast(detail::assertNotNull( malloc(calcNumBytesTotal(numElementsWithBuffer)))); // no need for calloc because clonData does memcpy mInfo = reinterpret_cast(mKeyVals + numElementsWithBuffer); mNumElements = o.mNumElements; mMask = o.mMask; mMaxNumElementsAllowed = o.mMaxNumElementsAllowed; mInfoInc = o.mInfoInc; mInfoHashShift = o.mInfoHashShift; cloneData(o); } } // Creates a copy of the given map. Copy constructor of each entry is used. // Not sure why clang-tidy thinks this doesn't handle self assignment, it does // NOLINTNEXTLINE(bugprone-unhandled-self-assignment,cert-oop54-cpp) Table& operator=(Table const& o) { ROBIN_HOOD_TRACE(this) if (&o == this) { // prevent assigning of itself return *this; } // we keep using the old allocator and not assign the new one, because we want to keep // the memory available. when it is the same size. if (o.empty()) { if (0 == mMask) { // nothing to do, we are empty too return *this; } // not empty: destroy what we have there // clear also resets mInfo to 0, that's sometimes not necessary. destroy(); init(); WHash::operator=(static_cast(o)); WKeyEqual::operator=(static_cast(o)); DataPool::operator=(static_cast(o)); return *this; } // clean up old stuff Destroyer::value>{}.nodes(*this); if (mMask != o.mMask) { // no luck: we don't have the same array size allocated, so we need to realloc. if (0 != mMask) { // only deallocate if we actually have data! free(mKeyVals); } auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1); mKeyVals = static_cast(detail::assertNotNull( malloc(calcNumBytesTotal(numElementsWithBuffer)))); // no need for calloc here because cloneData performs a memcpy. mInfo = reinterpret_cast(mKeyVals + numElementsWithBuffer); // sentinel is set in cloneData } WHash::operator=(static_cast(o)); WKeyEqual::operator=(static_cast(o)); DataPool::operator=(static_cast(o)); mNumElements = o.mNumElements; mMask = o.mMask; mMaxNumElementsAllowed = o.mMaxNumElementsAllowed; mInfoInc = o.mInfoInc; mInfoHashShift = o.mInfoHashShift; cloneData(o); return *this; } // Swaps everything between the two maps. void swap(Table& o) { ROBIN_HOOD_TRACE(this) using std::swap; swap(o, *this); } // Clears all data, without resizing. void clear() { ROBIN_HOOD_TRACE(this) if (empty()) { // don't do anything! also important because we don't want to write to // DummyInfoByte::b, even though we would just write 0 to it. return; } Destroyer::value>{}.nodes(*this); auto const numElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1); // clear everything, then set the sentinel again uint8_t const z = 0; std::fill(mInfo, mInfo + calcNumBytesInfo(numElementsWithBuffer), z); mInfo[numElementsWithBuffer] = 1; mInfoInc = InitialInfoInc; mInfoHashShift = InitialInfoHashShift; } // Destroys the map and all it's contents. ~Table() { ROBIN_HOOD_TRACE(this) destroy(); } // Checks if both tables contain the same entries. Order is irrelevant. bool operator==(const Table& other) const { ROBIN_HOOD_TRACE(this) if (other.size() != size()) { return false; } for (auto const& otherEntry : other) { if (!has(otherEntry)) { return false; } } return true; } bool operator!=(const Table& other) const { ROBIN_HOOD_TRACE(this) return !operator==(other); } template typename std::enable_if::value, Q&>::type operator[](const key_type& key) { ROBIN_HOOD_TRACE(this) return doCreateByKey(key); } template typename std::enable_if::value, Q&>::type operator[](key_type&& key) { ROBIN_HOOD_TRACE(this) return doCreateByKey(std::move(key)); } template void insert(Iter first, Iter last) { for (; first != last; ++first) { // value_type ctor needed because this might be called with std::pair's insert(value_type(*first)); } } template std::pair emplace(Args&&... args) { ROBIN_HOOD_TRACE(this) Node n{*this, std::forward(args)...}; auto r = doInsert(std::move(n)); if (!r.second) { // insertion not possible: destroy node // NOLINTNEXTLINE(bugprone-use-after-move) n.destroy(*this); } return r; } std::pair insert(const value_type& keyval) { ROBIN_HOOD_TRACE(this) return doInsert(keyval); } std::pair insert(value_type&& keyval) { return doInsert(std::move(keyval)); } // Returns 1 if key is found, 0 otherwise. size_t count(const key_type& key) const { // NOLINT(modernize-use-nodiscard) ROBIN_HOOD_TRACE(this) auto kv = mKeyVals + findIdx(key); if (kv != reinterpret_cast_no_cast_align_warning(mInfo)) { return 1; } return 0; } template // NOLINTNEXTLINE(modernize-use-nodiscard) typename std::enable_if::type count(const OtherKey& key) const { ROBIN_HOOD_TRACE(this) auto kv = mKeyVals + findIdx(key); if (kv != reinterpret_cast_no_cast_align_warning(mInfo)) { return 1; } return 0; } bool contains(const key_type& key) const { // NOLINT(modernize-use-nodiscard) return 1U == count(key); } template // NOLINTNEXTLINE(modernize-use-nodiscard) typename std::enable_if::type contains(const OtherKey& key) const { return 1U == count(key); } // Returns a reference to the value found for key. // Throws std::out_of_range if element cannot be found template // NOLINTNEXTLINE(modernize-use-nodiscard) typename std::enable_if::value, Q&>::type at(key_type const& key) { ROBIN_HOOD_TRACE(this) auto kv = mKeyVals + findIdx(key); if (kv == reinterpret_cast_no_cast_align_warning(mInfo)) { doThrow("key not found"); } return kv->getSecond(); } // Returns a reference to the value found for key. // Throws std::out_of_range if element cannot be found template // NOLINTNEXTLINE(modernize-use-nodiscard) typename std::enable_if::value, Q const&>::type at(key_type const& key) const { ROBIN_HOOD_TRACE(this) auto kv = mKeyVals + findIdx(key); if (kv == reinterpret_cast_no_cast_align_warning(mInfo)) { doThrow("key not found"); } return kv->getSecond(); } const_iterator find(const key_type& key) const { // NOLINT(modernize-use-nodiscard) ROBIN_HOOD_TRACE(this) const size_t idx = findIdx(key); return const_iterator{mKeyVals + idx, mInfo + idx}; } template const_iterator find(const OtherKey& key, is_transparent_tag /*unused*/) const { ROBIN_HOOD_TRACE(this) const size_t idx = findIdx(key); return const_iterator{mKeyVals + idx, mInfo + idx}; } template typename std::enable_if::type // NOLINT(modernize-use-nodiscard) find(const OtherKey& key) const { // NOLINT(modernize-use-nodiscard) ROBIN_HOOD_TRACE(this) const size_t idx = findIdx(key); return const_iterator{mKeyVals + idx, mInfo + idx}; } iterator find(const key_type& key) { ROBIN_HOOD_TRACE(this) const size_t idx = findIdx(key); return iterator{mKeyVals + idx, mInfo + idx}; } template iterator find(const OtherKey& key, is_transparent_tag /*unused*/) { ROBIN_HOOD_TRACE(this) const size_t idx = findIdx(key); return iterator{mKeyVals + idx, mInfo + idx}; } template typename std::enable_if::type find(const OtherKey& key) { ROBIN_HOOD_TRACE(this) const size_t idx = findIdx(key); return iterator{mKeyVals + idx, mInfo + idx}; } iterator begin() { ROBIN_HOOD_TRACE(this) if (empty()) { return end(); } return iterator(mKeyVals, mInfo, fast_forward_tag{}); } const_iterator begin() const { // NOLINT(modernize-use-nodiscard) ROBIN_HOOD_TRACE(this) return cbegin(); } const_iterator cbegin() const { // NOLINT(modernize-use-nodiscard) ROBIN_HOOD_TRACE(this) if (empty()) { return cend(); } return const_iterator(mKeyVals, mInfo, fast_forward_tag{}); } iterator end() { ROBIN_HOOD_TRACE(this) // no need to supply valid info pointer: end() must not be dereferenced, and only node // pointer is compared. return iterator{reinterpret_cast_no_cast_align_warning(mInfo), nullptr}; } const_iterator end() const { // NOLINT(modernize-use-nodiscard) ROBIN_HOOD_TRACE(this) return cend(); } const_iterator cend() const { // NOLINT(modernize-use-nodiscard) ROBIN_HOOD_TRACE(this) return const_iterator{reinterpret_cast_no_cast_align_warning(mInfo), nullptr}; } iterator erase(const_iterator pos) { ROBIN_HOOD_TRACE(this) // its safe to perform const cast here // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast) return erase(iterator{const_cast(pos.mKeyVals), const_cast(pos.mInfo)}); } // Erases element at pos, returns iterator to the next element. iterator erase(iterator pos) { ROBIN_HOOD_TRACE(this) // we assume that pos always points to a valid entry, and not end(). auto const idx = static_cast(pos.mKeyVals - mKeyVals); shiftDown(idx); --mNumElements; if (*pos.mInfo) { // we've backward shifted, return this again return pos; } // no backward shift, return next element return ++pos; } size_t erase(const key_type& key) { ROBIN_HOOD_TRACE(this) size_t idx{}; InfoType info{}; keyToIdx(key, &idx, &info); // check while info matches with the source idx do { if (info == mInfo[idx] && WKeyEqual::operator()(key, mKeyVals[idx].getFirst())) { shiftDown(idx); --mNumElements; return 1; } next(&info, &idx); } while (info <= mInfo[idx]); // nothing found to delete return 0; } // reserves space for the specified number of elements. Makes sure the old data fits. // exactly the same as reserve(c). void rehash(size_t c) { reserve(c); } // reserves space for the specified number of elements. Makes sure the old data fits. // Exactly the same as resize(c). Use resize(0) to shrink to fit. void reserve(size_t c) { ROBIN_HOOD_TRACE(this) auto const minElementsAllowed = (std::max)(c, mNumElements); auto newSize = InitialNumElements; while (calcMaxNumElementsAllowed(newSize) < minElementsAllowed && newSize != 0) { newSize *= 2; } if (ROBIN_HOOD_UNLIKELY(newSize == 0)) { throwOverflowError(); } rehashPowerOfTwo(newSize); } size_type size() const noexcept { // NOLINT(modernize-use-nodiscard) ROBIN_HOOD_TRACE(this) return mNumElements; } size_type max_size() const noexcept { // NOLINT(modernize-use-nodiscard) ROBIN_HOOD_TRACE(this) return static_cast(-1); } ROBIN_HOOD(NODISCARD) bool empty() const noexcept { ROBIN_HOOD_TRACE(this) return 0 == mNumElements; } float max_load_factor() const noexcept { // NOLINT(modernize-use-nodiscard) ROBIN_HOOD_TRACE(this) return MaxLoadFactor100 / 100.0F; } // Average number of elements per bucket. Since we allow only 1 per bucket float load_factor() const noexcept { // NOLINT(modernize-use-nodiscard) ROBIN_HOOD_TRACE(this) return static_cast(size()) / static_cast(mMask + 1); } ROBIN_HOOD(NODISCARD) size_t mask() const noexcept { ROBIN_HOOD_TRACE(this) return mMask; } ROBIN_HOOD(NODISCARD) size_t calcMaxNumElementsAllowed(size_t maxElements) const noexcept { if (ROBIN_HOOD_LIKELY(maxElements <= (std::numeric_limits::max)() / 100)) { return maxElements * MaxLoadFactor100 / 100; } // we might be a bit inprecise, but since maxElements is quite large that doesn't matter return (maxElements / 100) * MaxLoadFactor100; } ROBIN_HOOD(NODISCARD) size_t calcNumBytesInfo(size_t numElements) const noexcept { // we add a uint64_t, which houses the sentinel (first byte) and padding so we can load // 64bit types. return numElements + sizeof(uint64_t); } ROBIN_HOOD(NODISCARD) size_t calcNumElementsWithBuffer(size_t numElements) const noexcept { auto maxNumElementsAllowed = calcMaxNumElementsAllowed(numElements); return numElements + (std::min)(maxNumElementsAllowed, (static_cast(0xFF))); } // calculation only allowed for 2^n values ROBIN_HOOD(NODISCARD) size_t calcNumBytesTotal(size_t numElements) const { #if ROBIN_HOOD(BITNESS) == 64 return numElements * sizeof(Node) + calcNumBytesInfo(numElements); #else // make sure we're doing 64bit operations, so we are at least safe against 32bit overflows. auto const ne = static_cast(numElements); auto const s = static_cast(sizeof(Node)); auto const infos = static_cast(calcNumBytesInfo(numElements)); auto const total64 = ne * s + infos; auto const total = static_cast(total64); if (ROBIN_HOOD_UNLIKELY(static_cast(total) != total64)) { throwOverflowError(); } return total; #endif } private: template ROBIN_HOOD(NODISCARD) typename std::enable_if::value, bool>::type has(const value_type& e) const { ROBIN_HOOD_TRACE(this) auto it = find(e.first); return it != end() && it->second == e.second; } template ROBIN_HOOD(NODISCARD) typename std::enable_if::value, bool>::type has(const value_type& e) const { ROBIN_HOOD_TRACE(this) return find(e) != end(); } // reserves space for at least the specified number of elements. // only works if numBuckets if power of two void rehashPowerOfTwo(size_t numBuckets) { ROBIN_HOOD_TRACE(this) Node* const oldKeyVals = mKeyVals; uint8_t const* const oldInfo = mInfo; const size_t oldMaxElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1); // resize operation: move stuff init_data(numBuckets); if (oldMaxElementsWithBuffer > 1) { for (size_t i = 0; i < oldMaxElementsWithBuffer; ++i) { if (oldInfo[i] != 0) { insert_move(std::move(oldKeyVals[i])); // destroy the node but DON'T destroy the data. oldKeyVals[i].~Node(); } } // don't destroy old data: put it into the pool instead DataPool::addOrFree(oldKeyVals, calcNumBytesTotal(oldMaxElementsWithBuffer)); } } ROBIN_HOOD(NOINLINE) void throwOverflowError() const { #if ROBIN_HOOD(HAS_EXCEPTIONS) throw std::overflow_error("robin_hood::map overflow"); #else abort(); #endif } void init_data(size_t max_elements) { mNumElements = 0; mMask = max_elements - 1; mMaxNumElementsAllowed = calcMaxNumElementsAllowed(max_elements); auto const numElementsWithBuffer = calcNumElementsWithBuffer(max_elements); // calloc also zeroes everything mKeyVals = reinterpret_cast(detail::assertNotNull( calloc(1, calcNumBytesTotal(numElementsWithBuffer)))); mInfo = reinterpret_cast(mKeyVals + numElementsWithBuffer); // set sentinel mInfo[numElementsWithBuffer] = 1; mInfoInc = InitialInfoInc; mInfoHashShift = InitialInfoHashShift; } template typename std::enable_if::value, Q&>::type doCreateByKey(Arg&& key) { while (true) { size_t idx{}; InfoType info{}; keyToIdx(key, &idx, &info); nextWhileLess(&info, &idx); // while we potentially have a match. Can't do a do-while here because when mInfo is // 0 we don't want to skip forward while (info == mInfo[idx]) { if (WKeyEqual::operator()(key, mKeyVals[idx].getFirst())) { // key already exists, do not insert. return mKeyVals[idx].getSecond(); } next(&info, &idx); } // unlikely that this evaluates to true if (ROBIN_HOOD_UNLIKELY(mNumElements >= mMaxNumElementsAllowed)) { increase_size(); continue; } // key not found, so we are now exactly where we want to insert it. auto const insertion_idx = idx; auto const insertion_info = info; if (ROBIN_HOOD_UNLIKELY(insertion_info + mInfoInc > 0xFF)) { mMaxNumElementsAllowed = 0; } // find an empty spot while (0 != mInfo[idx]) { next(&info, &idx); } auto& l = mKeyVals[insertion_idx]; if (idx == insertion_idx) { // put at empty spot. This forwards all arguments into the node where the object // is constructed exactly where it is needed. ::new (static_cast(&l)) Node(*this, std::piecewise_construct, std::forward_as_tuple(std::forward(key)), std::forward_as_tuple()); } else { shiftUp(idx, insertion_idx); l = Node(*this, std::piecewise_construct, std::forward_as_tuple(std::forward(key)), std::forward_as_tuple()); } // mKeyVals[idx].getFirst() = std::move(key); mInfo[insertion_idx] = static_cast(insertion_info); ++mNumElements; return mKeyVals[insertion_idx].getSecond(); } } // This is exactly the same code as operator[], except for the return values template std::pair doInsert(Arg&& keyval) { while (true) { size_t idx{}; InfoType info{}; keyToIdx(getFirstConst(keyval), &idx, &info); nextWhileLess(&info, &idx); // while we potentially have a match while (info == mInfo[idx]) { if (WKeyEqual::operator()(getFirstConst(keyval), mKeyVals[idx].getFirst())) { // key already exists, do NOT insert. // see http://en.cppreference.com/w/cpp/container/unordered_map/insert return std::make_pair(iterator(mKeyVals + idx, mInfo + idx), false); } next(&info, &idx); } // unlikely that this evaluates to true if (ROBIN_HOOD_UNLIKELY(mNumElements >= mMaxNumElementsAllowed)) { increase_size(); continue; } // key not found, so we are now exactly where we want to insert it. auto const insertion_idx = idx; auto const insertion_info = info; if (ROBIN_HOOD_UNLIKELY(insertion_info + mInfoInc > 0xFF)) { mMaxNumElementsAllowed = 0; } // find an empty spot while (0 != mInfo[idx]) { next(&info, &idx); } auto& l = mKeyVals[insertion_idx]; if (idx == insertion_idx) { ::new (static_cast(&l)) Node(*this, std::forward(keyval)); } else { shiftUp(idx, insertion_idx); l = Node(*this, std::forward(keyval)); } // put at empty spot mInfo[insertion_idx] = static_cast(insertion_info); ++mNumElements; return std::make_pair(iterator(mKeyVals + insertion_idx, mInfo + insertion_idx), true); } } bool try_increase_info() { ROBIN_HOOD_LOG("mInfoInc=" << mInfoInc << ", numElements=" << mNumElements << ", maxNumElementsAllowed=" << calcMaxNumElementsAllowed(mMask + 1)) if (mInfoInc <= 2) { // need to be > 2 so that shift works (otherwise undefined behavior!) return false; } // we got space left, try to make info smaller mInfoInc = static_cast(mInfoInc >> 1U); // remove one bit of the hash, leaving more space for the distance info. // This is extremely fast because we can operate on 8 bytes at once. ++mInfoHashShift; auto const numElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1); for (size_t i = 0; i < numElementsWithBuffer; i += 8) { auto val = unaligned_load(mInfo + i); val = (val >> 1U) & UINT64_C(0x7f7f7f7f7f7f7f7f); std::memcpy(mInfo + i, &val, sizeof(val)); } // update sentinel, which might have been cleared out! mInfo[numElementsWithBuffer] = 1; mMaxNumElementsAllowed = calcMaxNumElementsAllowed(mMask + 1); return true; } void increase_size() { // nothing allocated yet? just allocate InitialNumElements if (0 == mMask) { init_data(InitialNumElements); return; } auto const maxNumElementsAllowed = calcMaxNumElementsAllowed(mMask + 1); if (mNumElements < maxNumElementsAllowed && try_increase_info()) { return; } ROBIN_HOOD_LOG("mNumElements=" << mNumElements << ", maxNumElementsAllowed=" << maxNumElementsAllowed << ", load=" << (static_cast(mNumElements) * 100.0 / (static_cast(mMask) + 1))) // it seems we have a really bad hash function! don't try to resize again if (mNumElements * 2 < calcMaxNumElementsAllowed(mMask + 1)) { throwOverflowError(); } rehashPowerOfTwo((mMask + 1) * 2); } void destroy() { if (0 == mMask) { // don't deallocate! return; } Destroyer::value>{} .nodesDoNotDeallocate(*this); // This protection against not deleting mMask shouldn't be needed as it's sufficiently // protected with the 0==mMask check, but I have this anyways because g++ 7 otherwise // reports a compile error: attempt to free a non-heap object ‘fm’ // [-Werror=free-nonheap-object] if (mKeyVals != reinterpret_cast_no_cast_align_warning(&mMask)) { free(mKeyVals); } } void init() noexcept { mKeyVals = reinterpret_cast_no_cast_align_warning(&mMask); mInfo = reinterpret_cast(&mMask); mNumElements = 0; mMask = 0; mMaxNumElementsAllowed = 0; mInfoInc = InitialInfoInc; mInfoHashShift = InitialInfoHashShift; } // members are sorted so no padding occurs Node* mKeyVals = reinterpret_cast_no_cast_align_warning(&mMask); // 8 byte 8 uint8_t* mInfo = reinterpret_cast(&mMask); // 8 byte 16 size_t mNumElements = 0; // 8 byte 24 size_t mMask = 0; // 8 byte 32 size_t mMaxNumElementsAllowed = 0; // 8 byte 40 InfoType mInfoInc = InitialInfoInc; // 4 byte 44 InfoType mInfoHashShift = InitialInfoHashShift; // 4 byte 48 // 16 byte 56 if NodeAllocator }; } // namespace detail // map template , typename KeyEqual = std::equal_to, size_t MaxLoadFactor100 = 80> using unordered_flat_map = detail::Table; template , typename KeyEqual = std::equal_to, size_t MaxLoadFactor100 = 80> using unordered_node_map = detail::Table; template , typename KeyEqual = std::equal_to, size_t MaxLoadFactor100 = 80> using unordered_map = detail::Table) <= sizeof(size_t) * 6 && std::is_nothrow_move_constructible>::value && std::is_nothrow_move_assignable>::value, MaxLoadFactor100, Key, T, Hash, KeyEqual>; // set template , typename KeyEqual = std::equal_to, size_t MaxLoadFactor100 = 80> using unordered_flat_set = detail::Table; template , typename KeyEqual = std::equal_to, size_t MaxLoadFactor100 = 80> using unordered_node_set = detail::Table; template , typename KeyEqual = std::equal_to, size_t MaxLoadFactor100 = 80> using unordered_set = detail::Table::value && std::is_nothrow_move_assignable::value, MaxLoadFactor100, Key, void, Hash, KeyEqual>; } // namespace robin_hood #endif megadepth-1.2.0/run.sh000077500000000000000000000000701420302544700146110ustar00rootroot00000000000000#!/usr/bin/env bash docker run -v`pwd`:/code megadepth megadepth-1.2.0/run_hbb_build_container.sh000077500000000000000000000004271420302544700206530ustar00rootroot00000000000000#for interactive debugging switch to just bash shell #docker run --ulimit core=-1 -it --rm --volume `pwd`:/build:rw --entrypoint=/bin/bash de2c52df2597 -i docker run --ulimit core=-1 -it --rm --volume `pwd`:/build:rw --entrypoint=/build/build_no_container_hbb.sh de2c52df2597 -i megadepth-1.2.0/run_macos_build_container.sh000077500000000000000000000003321420302544700212150ustar00rootroot00000000000000#docker run --ulimit core=-1 -it --volume `pwd`:/build:rw --entrypoint=/bin/bash osxcross_gcc -i docker run --ulimit core=-1 -it --volume `pwd`:/build:rw --entrypoint=/build/build_no_container_macos.sh osxcross_gcc -i megadepth-1.2.0/run_windows_build_container.sh000077500000000000000000000003341420302544700216070ustar00rootroot00000000000000#docker run --ulimit core=-1 -it --volume `pwd`:/build:rw --entrypoint=/bin/bash a3939c35343f -i docker run --ulimit core=-1 -it --volume `pwd`:/build:rw --entrypoint=/build/build_no_container_windows.sh a3939c35343f -i megadepth-1.2.0/sample_aggregation/000077500000000000000000000000001420302544700173015ustar00rootroot00000000000000megadepth-1.2.0/sample_aggregation/Snakefile.paste000066400000000000000000000122101420302544700222340ustar00rootroot00000000000000#start import sys import os import glob #config params: #+) input= #+) sample_ids_file= #optional: #+) existing_sums= #+) pigz_threads=<#_of_threads_to_use_for_final_pasted_file_compression> #+) prefix= FILES=['%sall.samples.pasted.gz' % config['prefix']] main_script_path=os.path.join(workflow.basedir) SCRIPTS={'find':os.path.join(main_script_path,'find_new_files.sh'),'group':os.path.join(main_script_path,'group_sums.sh'),'paste':os.path.join(main_script_path,'paste_sums.sh')} #initial coords/annotations (e.g. exons.bed.w_header.gz in Monorail) if 'existing_sums' not in config: config['existing_sums']="" #how many threads to allow pigz when doing the final single file compression if 'pigz_threads' not in config: config['pigz_threads']=4 wildcard_constraints: study_group_num="[0-9a-zA-Z]{2}", run_group_num="[0-9a-zA-Z]{2}", #prefix example: 'dn.' (for APA sites) or empty string type=config['prefix']+"all" rule all: input: expand("{file}", file=FILES) ###exon SUM pasting rules rule find_sums: input: config['input'], config['sample_ids_file'] output: config['staging'] + '/{type}.groups.manifest' params: staging=config['staging'], script_path=SCRIPTS['find'], type=lambda wildcards: wildcards.type shell: "{params.script_path} {input[0]} {input[1]} {params.staging} {params.type} .tsv" rule group_sums: input: config['staging'] + '/{type}.groups.manifest' output: config['staging'] + '/{type}.{study_group_num}.{run_group_num}.grouped' params: study_group_num=lambda wildcards: wildcards.study_group_num, run_group_num=lambda wildcards: wildcards.run_group_num, staging=config['staging'], script_path=SCRIPTS['group'], type=lambda wildcards: wildcards.type shell: "{params.script_path} {params.staging}/{params.type}.{params.study_group_num}.{params.run_group_num}.manifest {output} convert_to_int" #do a rule instantiation per *run* low-order name grouping to do hierarchical pastes rule paste_sums_per_group: input: config['staging'] + '/{type}.{study_group_num}.{run_group_num}.grouped' output: config['staging'] + '/{type}.{study_group_num}.{run_group_num}.pasted' params: study_group_num=lambda wildcards: wildcards.study_group_num, run_group_num=lambda wildcards: wildcards.run_group_num, staging=config['staging'], script_path=SCRIPTS['paste'], type=lambda wildcards: wildcards.type, pigz_threads=0 shell: "{params.script_path} {params.staging}/{params.type}.{params.study_group_num}.{params.run_group_num}.manifest {output} {params.pigz_threads}" def get_pasted_sum_files(wildcards): study_loworder = wildcards.study_group_num fin = open(config['sample_ids_file'], "r") lines = fin.read().split('\n') fin.close() return [config['staging']+"/%s.%s.%s.pasted" % (wildcards.type, f.split('\t')[0][-2:], f.split('\t')[1][-2:]) for f in lines[:-1]] #return [config['staging']+"/%s.%s.%s.pasted" % (wildcards.type, f.split('/')[-3], f.split('/')[-1]) for f in glob.glob(config['input']+'/%s/*??' % (study_loworder))] rule collect_pasted_sums: input: get_pasted_sum_files output: config['staging'] + '/{type}.{study_group_num}.pasted.files.list' params: study_group_num=lambda wildcards: wildcards.study_group_num, staging=config['staging'], type=lambda wildcards: wildcards.type shell: "ls {params.staging}/{params.type}.{params.study_group_num}.??.pasted > {output}" rule paste_sums_per_study_group: input: config['staging'] + '/{type}.{study_group_num}.pasted.files.list' output: os.path.join(config['staging'], '{type}.{study_group_num}.pasted') params: study_group_num=lambda wildcards: wildcards.study_group_num, staging=config['staging'], script_path=SCRIPTS['paste'], existing_sums=config['existing_sums'], type=lambda wildcards: wildcards.type, pigz_threads=0 shell: "{params.script_path} {input} {output} {params.pigz_threads} dont_get_ids" def get_study_pasted_sum_files(wildcards): fin = open(config['sample_ids_file'], "r") lines = fin.read().split('\n') fin.close() return [config['staging']+"/%s.%s.pasted" % (wildcards.type, f.split('\t')[0][-2:]) for f in lines[:-1]] #return [config['staging']+"/%s.%s.pasted" % (wildcards.type, f.split('/')[-1]) for f in glob.glob(config['input']+'/??')] rule collect_study_pasted_sums: input: get_study_pasted_sum_files output: config['staging'] + '/{type}.groups.pasted.files.list' params: staging=config['staging'], type=lambda wildcards: wildcards.type shell: "ls {params.staging}/{params.type}.??.pasted > {output}" rule paste_sums_final: input: config['staging'] + '/{type}.groups.pasted.files.list' output: '{type}.samples.pasted.gz' params: staging=config['staging'], script_path=SCRIPTS['paste'], existing_sums=config['existing_sums'], type=lambda wildcards: wildcards.type, pigz_threads = config['pigz_threads'] shell: "{params.script_path} {input} {output} {params.pigz_threads} dont_get_ids {params.existing_sums}" megadepth-1.2.0/sample_aggregation/aggregate.sh000066400000000000000000000040411420302544700215620ustar00rootroot00000000000000#!/usr/bin/env bash set -xe sdir=$(dirname $0) mkdir -p paste mkdir -p runs /bin/bash -x ${sdir}/find_new_files.sh ./sums srav3h.ids.all.final.tsv ./paste all .tsv > find.run 2>&1 & ls paste/all.??.??.manifest | perl -ne 'chomp; print "'${sdir}'/remove_extra_samples.sh $_\n";' > remove_extra_samples.jobs /bin/bash -x remove_extra_samples.randoms100.jobs cut -f 1-2 srav3h.ids.all.final.tsv | perl -ne 'chomp; ($study,$run)=split(/\t/,$_); $study=~/(..)$/; $s=$1; $run=~/(..)$/; $r=$1; print "/bin/bash -x '${sdir}'/group_sums.sh paste/all.$s.$r.manifest paste/all.$s.$r.grouped convert_to_int > runs/all.$s.$r.grouped.run 2>&1\n";' | sort -u > group_sums.u.jobs /usr/bin/time -v parallel -j 33 < group_sums.u.jobs > group_sums.u.jobs.run 2>&1 cut -f 1-2 srav3h.ids.all.final.tsv | perl -ne 'chomp; ($study,$run)=split(/\t/,$_); $study=~/(..)$/; $s=$1; $run=~/(..)$/; $r=$1; print "/bin/bash -x '${sdir}'/paste_sums.sh paste/all.$s.$r.manifest paste/all.$s.$r.pasted 0 > runs/all.$s.$r.pasted.run 2>&1\n";' | sort -u > paste_sums_per_group.u.jobs /usr/bin/time -v parallel -j33 < paste_sums_per_group.u.jobs > paste_sums_per_group.u.jobs.run 2>&1 cut -f 1-2 srav3h.ids.all.final.tsv | perl -ne 'chomp; ($study,$run)=split(/\t/,$_); $study=~/(..)$/; $s=$1; $run=~/(..)$/; $r=$1; print "$s\t$r\n";' | sort -u | perl -ne 'chomp; ($s,$r)=split(/\t/,$_); print "ls paste/all.$s.??.pasted > paste/all.$s.pasted.files.list\n";' | sort -u > ls.paste.u.jobs /bin/bash -x ls.paste.jobs.u > ls.paste.jobs.u.run 2>&1 ls paste/*.list | perl -ne 'chomp; $f=$_; $f=~/all\.(..)\.pasted\.files\.list/; $s=$1; print "/bin/bash -x '${sdir}'/paste_sums.sh $f paste/all.$s.pasted 0 dont_get_ids > runs/all.$s.pasted.run 2>&1\n";' | sort -u > paste_sums_per_study_group.u.jobs /usr/bin/time -v parallel -j33 < paste_sums_per_study_group.u.jobs > paste_sums_per_study_group.u.jobs.run 2>&1 ls paste/all.??.pasted > paste/all.groups.pasted.files.list /usr/bin/time -v /bin/bash -x ${sdir}/paste_sums.sh paste/all.groups.pasted.files.list all.samples.pasted.gz 8 dont_get_ids ryten.random.bed megadepth-1.2.0/sample_aggregation/find_new_files.sh000077500000000000000000000027201420302544700226140ustar00rootroot00000000000000#!/bin/bash set -o pipefail -o errexit #top level of incoming dir #(where fully done analyses were copied/symlinked from original set of attempts) search_dir=$1 #map between external sample accessions/UUIDs and study format: studysamplesecondary_sample_id (or just sample) sample_ids_file=$2 destination_dir=$3 #e.g. "all" analysis_type=$4 #.e.g ".tsv" additional_suffix=$5 #now uses both study loworder digits *and* run loworder digits in manifest file names find -L $search_dir -name "*.${analysis_type}${additional_suffix}" -size +0 | perl -ne 'BEGIN { open(IN,"<'${sample_ids_file}'"); %rids; while($line=) { chomp($line); @f=split(/\t/,$line); $rid=$f[2]; $run_acc=$f[1]; $rids{$run_acc}=[$rid, $f[0]]; } close(IN); } chomp; $f=$_; @f=split(/\//,$f); $fname=pop(@f); `pushd '${destination_dir}' && ln -fs ../$f && popd`; $fname=~/^([^\.]+)\./; $run_acc=$1; $run_acc=~/(..)$/; $acc_loworder=$1; ($rid, $study)=@{$rids{$run_acc}}; $study=~/(..)$/; $study_loworder=$1; push(@{$h{$study_loworder.".".$acc_loworder}},["'${destination_dir}'/$fname",$rid,"'${destination_dir}'/$fname",$run_acc]); END { open(ALL_OUT,">'${destination_dir}'/'${analysis_type}'.groups.manifest"); for $k (keys %h) { open(OUT,">'${destination_dir}'/'${analysis_type}'.$k.manifest"); for $a (@{$h{$k}}) { print ALL_OUT "".$a->[3]."\t'${analysis_type}'.$k.manifest\n"; print OUT "".join("\t",@$a)."\n"; } close(OUT); } close(ALL_OUT); }' #touch $destination_dir/${analysis_type}.groups.manifest megadepth-1.2.0/sample_aggregation/group_sums.sh000077500000000000000000000013701420302544700220440ustar00rootroot00000000000000#!/bin/bash -x set -o pipefail -o nounset -o errexit manifest=$1 output_sentinel=$2 convert_to_int=$3 #run one group job at a time, but run it for all of the files listed in the manifest if [[ -n $convert_to_int ]]; then #cut -f 3 $manifest | xargs -n 1 -P 1 -I{} sh -c 'cut -f 4 $1 | sed "s/\.0*$//" > ${1}.unc' -- {} cut -f 3 $manifest | xargs -n 1 -P 1 -I{} sh -c 'cat $1 | sed "s/\.0*$//" > ${1}.unc' -- {} else #cut -f 3 $manifest | xargs -n 1 -P 1 -I{} sh -c 'cut -f 4 $1 > ${1}.unc' -- {} cut -f 3 $manifest | xargs -n 1 -P 1 -I{} sh -c 'cat $1 $1 > ${1}.unc' -- {} fi cut -f 3 $manifest | perl -ne 'chomp; $f=$_.".unc"; @s=stat($f); if($s[7]==0) { `cp blank_exon_sums $f`;}' #when done, write a "done" sentinel touch $output_sentinel megadepth-1.2.0/sample_aggregation/paste_sums.sh000077500000000000000000000053071420302544700220300ustar00rootroot00000000000000#!/bin/bash set -o pipefail -o errexit manifest=$1 output=$2 threads=$3 dont_get_ids=$4 existing_sums=$5 #[optional] number of threads to use when pigz'ing output, default 4 if [[ -z $threads ]]; then threads=1 fi if [ -n "$dont_get_ids" ]; then #if set, we're doing a final paste of all previously pasted (or copied) sample groups so no need to handle sample IDs #for debugging, dont delete pasted intermediates cat $manifest | perl -ne 'chomp; $f=$_; $s.=" $f"; $c++; END { if($c > 1) { print "paste $s\n"; `paste $s > '${output}'`; } else { `cat $f > '${output}'`; }}' #cat $manifest | perl -ne 'chomp; $f=$_; $s.=" $f"; $c++; END { if($c > 1) { print "paste $s\n"; `paste $s > '${output}'`; `rm $s`; } else { `cat $f > '${output}'`; }}' #cat $manifest | perl -ne 'chomp; $f=$_; $s.=" $f"; $c++; END { if($c > 1) { print "paste $s\n"; `paste $s > '${output}'`; `rm $s`; } else { `cat $f > '${output}'`; `rm $f`; }}' else #here we need to make sure we output the correct order of samples IDs as the column header #acc_header is an env var #optional flag to switch to accessions instad of rail_ids as the header if [[ -z $acc_header ]]; then cut -f 2,3 $manifest | perl -ne 'chomp; ($rid,$f)=split(/\t/,$_); $h.="$rid\t"; $s.=" $f.unc"; $c++; END { $h=~s/\t$//; open(OUT,">'${output}'"); print OUT "$h\n"; close(OUT); if($c > 1) { print "paste $s\n"; `paste $s >> '${output}'`; } else { `cat $f.unc >> '${output}'`; }}' #cut -f 2,3 $manifest | perl -ne 'chomp; ($rid,$f)=split(/\t/,$_); $h.="$rid\t"; $s.=" $f.unc"; $c++; END { $h=~s/\t$//; open(OUT,">'${output}'"); print OUT "$h\n"; close(OUT); if($c > 1) { print "paste $s\n"; `paste $s >> '${output}'`; `rm $s`; } else { `cat $f.unc >> '${output}'`; `rm $f.unc`; }}' else cut -f 3,4 $manifest | perl -ne 'chomp; ($f,$rid)=split(/\t/,$_); $h.="$rid\t"; $s.=" $f.unc"; $c++; END { $h=~s/\t$//; open(OUT,">'${output}'"); print OUT "$h\n"; close(OUT); if($c > 1) { print "paste $s\n"; `paste $s >> '${output}'`; } else { `cat $f.unc >> '${output}'`; }}' #cut -f 3,4 $manifest | perl -ne 'chomp; ($f,$rid)=split(/\t/,$_); $h.="$rid\t"; $s.=" $f.unc"; $c++; END { $h=~s/\t$//; open(OUT,">'${output}'"); print OUT "$h\n"; close(OUT); if($c > 1) { print "paste $s\n"; `paste $s >> '${output}'`; `rm $s`; } else { `cat $f.unc >> '${output}'`; `rm $f.unc`; }}' fi fi if [ -n "$existing_sums" ]; then mv ${output} ${output}.pre_existing if [[ $threads -eq 0 ]]; then #assumes if we dont want to compress the output then the intermediate input is no compressed either paste ${existing_sums} ${output}.pre_existing > ${output} else paste ${existing_sums} ${output}.pre_existing | pigz --fast -p $threads > ${output} fi fi megadepth-1.2.0/sample_aggregation/remove_extra_samples.sh000077500000000000000000000001061420302544700240610ustar00rootroot00000000000000f=$1 export LC_ALL=C fgrep -v " " $f | sort -u > $f.new mv $f.new $f megadepth-1.2.0/tests/000077500000000000000000000000001420302544700146135ustar00rootroot00000000000000megadepth-1.2.0/tests/TCGA_BLCA_A13J.vcf.gz_cg_cov5.bw.bg.gz.chr1.60379.62229.bw000066400000000000000000000311351420302544700254110ustar00rootroot00000000000000&X0@O?? @xH?xchr1xc```pa ,6$~4TW*;"Hh$&xc```U{U6AO %h$//'&megadepth-1.2.0/tests/TCGA_BLCA_A13J_vs_chr1.61863.62160.annotation.tsv000066400000000000000000000002321420302544700243240ustar00rootroot00000000000000chr1 59056 59208 0.00 chr1 60379 60686 0.00 chr1 61604 61862 0.00 chr1 61863 62160 2.27 chr1 62161 62229 0.00 chr1 64426 64666 0.00 chr1 65977 66134 0.00 megadepth-1.2.0/tests/build_test_data.sh000077500000000000000000000027731420302544700203120ustar00rootroot00000000000000#!/usr/bin/env bash export LD_LIBRARY_PATH=/data7/megadepth/libBigWig:/data7/megadepth/htslib:$LD_LIBRARY_PATH MD=../mosdepth BT=~/bwtool_mine2/bwtool B2B='python3 ../bam2bigwig.py' KB2B=/data/kent_tools/bigWigToBedGraph WT=wiggletools EXONS=/data7/megadepth/exons.bed EXONS=$1 BAM=$2 echo -n "" > ${BAM}.per-base.bed.gz.bw.aucs echo -n "" > ${BAM}.annotated_aucs for t in default unique; do qarg=`perl -e '$t='${t}'; print "0" if($t eq "default"); print "10" if($t eq "unique");'` time $MD -F260 -t 4 -Q${qarg} ${BAM}.${t} $BAM time cat <(samtools view -H $BAM) <(zcat ${BAM}.${t}.per-base.bed.gz) | $B2B ${BAM}.${t}.per-base.bed.gz.bw time $BT summary $EXONS ${BAM}.${t}.per-base.bed.gz.bw /dev/stdout -fill=0 -with-sum -keep-bed -decimals=0 | cut -f1-3,10 | sort -k1,1 -k2,2n -k3,3n > ${BAM}.${t}.per-base.bed.gz.bw.sums.bed $KB2B ${BAM}.${t}.per-base.bed.gz.bw ${BAM}.${t}.per-base.bed.gz.bw.sums.bed.bg $WT AUC ${BAM}.${t}.per-base.bed.gz.bw | perl -ne 'BEGIN { $t='${t}'; } chomp; $f=$_; $f=~s/\.0+$//; print "ALL_READS_ALL_BASES\t$f\n" if($t eq "default"); print "UNIQUE_READS_ALL_BASES\t$f\n" if($t eq "unique");' >> ${BAM}.per-base.bed.gz.bw.aucs cat ${BAM}.${t}.per-base.bed.gz.bw.sums.bed | perl -ne 'BEGIN { $t='${t}'; } chomp; ($c,$s,$e,$v)=split(/\t/,$_); $f+=$v; END { print "ALL_READS_ANNOTATED_BASES\t$f\n" if($t eq "default"); print "UNIQUE_READS_ANNOTATED_BASES\t$f\n" if($t eq "unique"); }' >> ${BAM}.annotated_aucs done cat ${BAM}.per-base.bed.gz.bw.aucs >> ${BAM}.annotated_aucs megadepth-1.2.0/tests/bw.all_overlap_types.test_input.bw000066400000000000000000000454031420302544700235030ustar00rootroot00000000000000&X0 \(22/@w??A ,@R@xchr1=Cxc```gg-;02;%@\Dsv Zv DU!j~a.h$====Jxc```g.–].l3ݗLk;eŖ] *~YnC'^$h$==\==Bxc```gu->]NgQ;J qh$==2==2'&megadepth-1.2.0/tests/bw.all_overlap_types.test_output.bed000066400000000000000000000003201420302544700240130ustar00rootroot00000000000000chr1 121378685 121378704 0.00 chr1 121478687 121478800 0.32 chr1 121479000 121481992 12.42 chr1 121480203 121480221 5.79 chr1 121481830 121482451 4.89 chr1 121483258 121483259 0.32 AUC_ANNOTATED_BASES 23.738 megadepth-1.2.0/tests/check_overlaps.pl000077500000000000000000000027621420302544700201520ustar00rootroot00000000000000#!/usr/bin/env perl use strict; use warnings; my $target_bg=shift; my $coordsF=shift; #e.g. wgEncodeHaibTfbsH1hescSp1Pcr1xRawRep1.bg.bz open(IN,"<$coordsF"); my $total_sum = 0; while(my $line1=) { chomp($line1) ; my ($c,$s,$e,$n)=split(/\t/,$line1); my $output=`tabix $target_bg $c:$s-$e`; my @output = split(/\n/,$output); if(scalar(@output) == 0) { print STDERR "no matches for $line1, skipping\n"; next; } my $local_sum = 0; while(my $line2=shift(@output)) { chomp($line2); my ($c2,$s2,$e2,$v)=split(/\t/,$line2); #left side of annotation overhangs if($s < $s2 && $e <= $e2) { $local_sum+= ($e - $s2)*$v; } #right side of annotation overhangs elsif($s >= $s2 && $e > $e2) { $local_sum+= ($e2 - $s)*$v; } #target either exactly matches or is strictly contained in annotation elsif($s <= $s2 && $e >= $e2) { $local_sum+= ($e2 - $s2)*$v; } #annotation either exactly matches or is strictly contained in target elsif($s >= $s2 && $e <= $e2) { $local_sum+= ($e - $s)*$v; } else { print STDERR "ran out of matching possibilities $c:$s-$e (annotation) vs. $c2:$s2-$e2 (target)\n"; next; } } print "$line1\t$local_sum\n"; $total_sum += $local_sum; } close(IN); print "total_sum\t$total_sum\n"; megadepth-1.2.0/tests/chr1.61863.62160.bad_chrm_order.bed000066400000000000000000000003431420302544700222200ustar00rootroot00000000000000chr1 59056 59208 L2a_63 441 - chr2 60379 60686 MER47A_64 1529 - chr1 61604 61862 L1ME4c_65 439 - chr1 61863 62160 AluSc_66 2412 - chr1 62161 62229 L1ME4c_67 439 - chr1 64426 64666 L1PA4_68 1994 + chr1 65977 66134 MIR3_69 306 - megadepth-1.2.0/tests/chr1.61863.62160.bed000066400000000000000000000003431420302544700172670ustar00rootroot00000000000000chr1 59056 59208 L2a_63 441 - chr1 60379 60686 MER47A_64 1529 - chr1 61604 61862 L1ME4c_65 439 - chr1 61863 62160 AluSc_66 2412 - chr1 62161 62229 L1ME4c_67 439 - chr1 64426 64666 L1PA4_68 1994 + chr1 65977 66134 MIR3_69 306 - megadepth-1.2.0/tests/gh_bug_9.bed000066400000000000000000000004711420302544700167540ustar00rootroot00000000000000chr1 121378685 121378704 no_overlap chr1 121478687 121478800 annotation_overhangs_right_side chr1 121479000 121481992 annotation_overhangs_left_side chr1 121480203 121480221 exact_match chr1 121481830 121482451 annotation_strictly_contains_target chr1 121483258 121483259 annotation_fully_contained_within_target megadepth-1.2.0/tests/long_reads.bam000066400000000000000000000270131420302544700174140ustar00rootroot00000000000000BC?sred``p J(22b 1HH10Cr=*BC-|KoږߏIeQI6d"EI=ΨqзQ& : dA982" @F|kSR۸tUɽ[^?W}bםkͷk>m/n^~uwrxy ׿>w;>Ë~?3~7>߯~_?%'ףem߾wC?X,+`H2pUMg_Zoؓա:@*b=]vL{h &I=Ys[gPʲK&4gHk) %͒qHfJiI=/$i*p0à=@ ,ߠ0irÁ@nccd "9pe9(xٲ0IH3L4>Pd6 Yb,sL9uwh:}YK8dCr\#k@k;H"a۴eHB5rt0 qP2 B&&BHn2t7Slв K5 {f#/#(qw[B"]ԁ*./MEe+&b3 Xzj+ >p- ٱtF`6P^IR r8arU-VOr_Pn2lPA*8pd ; (Ah3PJQqQ\Y[tz\z_ࣃ>,ԌJTɶdumYabQm2`"φ wDG- 6d8a>LP5b8j %(_8=5I$෾2>l Aw Awl8@‘ -"P%u"[H Mcpci+i HcU7 +Kb7PN_\僁n}x j!D2bhC샑KfuV?;20a.ڐCF^]0ƛcam%JlBFI r& !b#=\5S\ԑD3`,,,ʐS(fAdcq@X*2>]A_C"T[ &itfBj}Gqp,&B G{Ü2bVܐCe2L)YT>=\YR5l!SNwh5wҥnM/LR&oPaJ1ǿWbu&|U^WɌ>$S{8M1SJe(ҍͻ%M3j=YizuYlDwy5䪟\zl=Do *iD0^cQ_kRxe TN[n0zȤ*Vr]zvx9T,9JGj%Ԥ4Y]]6ءpἚX ?n2AD m|K^ 3f' >XlFPi7AF'`|:1ӥ*~(_Fv6k\hb3qwpv.tYT;j9) -BԠtlJ#R{Mvr)R!ES1FFFMHLrBz (դ٥ZkRWuʩT${$4WKT]FZC T\E;vi"m' %FoN1)$%&-r eK+8lRYxn'v}ӏ'̢L@/z!e(\y&fĈBEv ةuKH2bvVt_-GPvNR պU6TQhfhB ,XmcrA/2cW2>pjnMJeHH6]JFJ( 2wԡ|1y?viTI:EZ'^>0fu#{g_1gvUjI`Ƚnr=RHt5q]YJJ_^%u}PҥK"N=J׵}6.fҬ5/;' S<6m25P 1.7 {p"!=P>aJըU#@xJwOp^jr`9\K ;٧:>bkZNhD4)TF@njT;l?BuFv,}6jtiݥs8 h(مfU2= o8"X/?ޢ6ݗ5{ j]0)BCmfo T .u.2VbiNJmتRr|7?*ֻ8VwƫiDmz/nW/Ce Zv*P.!yϴ$mDm"9EӴxB}P3 Ɠ2KXt^j&[d)ykSV[TdC| [VI];Np[73bNjR v)t1E-Vc͜oל{ZkK*tE,w>'赚3%F U(Tob)t:n": V'Fa)Bv)4( =oh EZr:nWdap,xAISu+/6JX( g膎vX RǪqE/!Zϯhi穦HFK]+9uL/ʀ@-ܹnV1Bj_C8  1> 0˼oQ-X:by.[Rvq/JAҡF$p #2,tjg),9Ds-AZu+hR*ځ|{I?gӧɃS)j/73Ax(N;s8 !9,NW%Ghln,;x)rI9yda2>7[p^,ʟ&'G0Cf^PXc hZ)l½8O`ҙ2t+7)'S7oDr6#[|ADpa|Z#+@,jY <%C^h>P_BE+?r^Hd3fӀ`Ju 6xލZ$5&憚[DwN9x>Ю=ѭeVygѧLYz/\h8O#N9&24ezG_o^ѩNޏ;oyq?^z^kױq8DZc|Nm&w7_hn~픟?N7>o}jޱ_>en)Oop :+?ңL<N SOK,ؚ&O=O6QkZ?k'/q{3r{@lz|Ĺ#ՙzuڐcf7]U!BTӀu7PUX3=="'AI4iP9#UTY ]SMe8-Le|uxbs&iTGF)$ G4 th`P4DMX2ttʅEj̔>T@CN)c!0xI>KBfZ P60G- ^m$X.r=Mf.v3J~5̢A%AH"9}H$(},I A͍b eQ23XP юB@c@=<?"^xSA6%_pӋ!Hqm 6)^#YrI!bA0%TlP !dH=h6"cZ<H<;,MA88,:zH(CÞZi-UTtsY@\9B]9pZ$6y c'K ameq u_t}HeZjmRM2X uA$(A@`NXa@f&Z7(> d P 0W}SY'lx\(MG!yL]m!z+Uo߷G77x JwI[O G6rJA 9`CmׇLѡ uz9$p >y4`QMWAh, *10bbje\0K-<#<,C1[6>C<J.ip^.Uџuz\<,9a{BO/(#]hlziԔ^2;!s $ @G${C"~TVIl f %(>fO`A`Y#x8 u8IЂ ^g *YZWKp2 R u9`kH,"c P C 6 f}Hӆ:d|"nfL_6dta*A uB:ٜ@Ơ0ٱ%v@ 64z4dׅ ^,P,u gk-2ɡ 44B 6Q'굕Ϫ!в#Y/mc=]n4 3epS:Ľ-2РeyŁauPChA2J+;: mM|5C2:_I!,mZ_O@۞ޗWvz@Uh 4{ Lm kr6~8pl{0#.Pg[/꼶(Bky1(X F$aa4`+rk"h'oT_;0Xs8 ~1 wrzRE~eMw;rɉumA_MfuNw,5 N}o>T "tu&w5owӛEfU)ՁĜqF[u#&$n*'0eF=ٓ-|Bfh[EY6mr#.FxD5'i,|mEލ^~.nm -Vs݉>٫~!n6l4H!r94۟E#snji Lv•79A@@4|ACP\| .Q%oѼ#QArB&{75d|EĪFAZw[$iuHu8i!?{7/(@!U4chjH|邸w Q7ڌ 1hW }ώd|s>`tN!sQM/V)/J'+D?p(/cB>h|z hrͿQ] QZ%O+xTFA]H?CKDX\'n'28gpO۬ :ՑRQXK/&DuXYf;AE0x C(X:L<-),F@^8\j݄e2 m` Ӹ!Q9 :ߝ庇yKOqjpL"@d` 1=y& C|>yL̙y:!O@ o"@7VҩzA!bQ9T{orDO5c!"y eyS+ sr(" vB~ӣyʮ0£ Q8 w ~(! m!ocTfY(LK nMkam.gJs҆-ŏxDAzLҹͲC2=HWO6(#xVL;aN! pq%:k G n]OXwDm%Kio$0o,^!eG @hG" YBj,k/E/.ss| nm{t@Q6ʪwxsGQt o((y8+c"E N/n ""W湲Eh#EWwAۂZ$O5"Bg/ikfg [ׇxc{m%a& ,lrB^|6 m(՗-h +ᢨ-FrDj\+BE,6]}_)gzW/·E+HonN;G~&x `53*0"tgj[&߾OqOU ]>1t*x=Z7?ؓ(LBt U3zxE•1-b\!yO{ҷvhX\͕;(qYω֌(/6EPҕG{ "w..xygGnT^,+馸hҾXb-/ JȲtipd&C>tMrk)4h+:1UdS.7wE@=&:iT6.sxE!3fQco@[7V>^]B8x~;#2\8;KJDhaM`?ơߤKUhs,17{O?/{-{l-iaG;{3^|S,S+Hҕ&^I;v?m)qZ}Hqm>QANIjk|FueA:]+|v]9~`K><"q.^#"x>p)._\Gh4Q&ue<.}hz5lS 1fdY Wk7c:@B%~m¬@"!m=/<;݄xNG\- )J|z]tRhi#.jof6vH_b=Vi]KQJK*fJ4zjvOhN-sL/n6GKf_h;Oi.߶u5<0TflbvK [!\A{sE*OؑgF"피6su/r1_bxDPE^ZUa۪ @C}F ͇L$_{=nf׉NdsD#]&p<ĘP:h"CmN:bt@hE 0>~@Rdz84l?]N߿t9;++sۙj^f>ZD:D)pDK o)VxIEħ~ݾ|e~'ډ?M \裀{AxQ]-lf Hq={#myszLDsOyKн=$U.;Yo Ymnm9?k)9aцxl {ga̸,?H>Y=[l^wֶt>77O3`[&<.;9BO2]RtxND $4z?lSq%_\8 ڑ%maf.޶ t+A[DF"ѕ ]_4?exam{-,'Oo}{_^R-ekF4ƛo-'M0 ttIܡMB{u~D|J_T/h^OۛAkNj]JFRx,Ug1v'Owx-1l!ne3nc᡻fZ̛6 C$y[DFrsftq}&𓍖.'}%vY^ >\X^8mjU#7T)X]*vN>̽40,/&ɺjKQdF_ʒG.wX#jyvf.>Bwdoa!5>D>no/Ὥ|3x6ٰ幩glXmφٰ տ:?ǧ_<6P1ٓ|5=^5(?+}[_΅#?#~ G*pdaW_EWRѵOBCmegadepth-1.2.0/tests/long_reads.bam.jxs.tsv000066400000000000000000000051551420302544700210350ustar00rootroot00000000000000chr1 1727385 1 0 114S423M1D677M1D321M1D72M1D581M2746N270M1D44M2166N64M1D81M2814N160M511N128M473N136M178N411M1D57M4546N418M1055N73M38S 1729463-1732208,1732524-1734689,1734836-1737649,1737810-1738320,1738449-1738921,1739058-1739235,1739705-1744250,1744669-1745723 1 chr1 1727385 1 0 83S16M2I27M2I9M1I12M2I4M1I12M2I6M1I12M1I8M1I12M1I6M1I22M2I12M1I5M1I5M1I8M1I5M1I10M1I29M1I10M1I5M1I8M1I7M1I16M2I8M1I21M1I34M1I15M2I6M3I7M1I6M3I13M1I7M1I13M2I10M1I9M1I26M1I16M1I7M2I11M2I14M2I14M1I8M2I9M1I29M1I7M1I11M2I6M1I8M1I34M1I27M1I6M2I13M1I10M1I6M1I21M1I4M1I37M1I8M1I22M1I17M1I35M1I6M2I39M1I13M2I19M1I16M1I11M1I8M1I6M2I17M1I4M1I29M1I53M1I4M1I14M2I28M1I11M1I15M2I16M1I8M1I10M1I20M1I7M1I23M2I11M1I8M1I10M1I31M1I20M1I6M2I8M1I12M1I8M1I5M1I5M1I15M1I5M1I4M2I6M1I7M1I6M1I6M1I10M3I8M2I16M2I15M1I15M1I7M1I16M2I21M1I7M1I19M1I11M1I5M1I18M1I20M1I7M1I6M2I11M1I6M1I9M2I12M1I10M1I9M1I8M1I9M1I5M1I4M1I10M1I14M2I22M1I5M1I22M1I10M2I11M2I8M1I7M1I9M1I7M1I15M2I8M2I7M2I10M1I7M2I22M2I20M1I9M1I12M2I7M1I15M2I8M1I8M2I7M1I9M1I11M1I22M3I12M1I3M1I16M2I11M1I9M1I14M2469N9M5I26M2I12M1I4M1I6M1I24M1I6M2I4M1I20M2I7M1I13M2I17M3I11M1I9M1I9M2I16M1I8M1I6M1I3M1I5M2I14M1I51M3I17M1I5M3I16M1I6M2I9M2I13M3I28M3I17M1I14M1I11M1I14M1I8M1I7M1I17M1I8M4I7M2I7M1I8M2I11M1I6M1I19M2I4M1I21M2I9M1I15M1I18M1I6M1I11M1I3M1I8M1I5M2166N23M1I8M1I5M1I8M2I22M3I31M1I5M1I11M1I6M1I8M2I11M2I8M366N12M1I16M1I7M1I5M1I9M1I4M1I24M1I5M1I17M1I4M1I6M1I14M2I12M1I33M2I5M1I7M2I4M1I5M1I16M1I3M3I12M1I4M1I22M1I19M1I9M2I25M1I7M1I4M1I9M1I18M2I7M1I14M1I21M2I9M1I7M1I7M1I18M2I12M1I12M2I4M1I20M2I31M1I6M2I12M1I11M2I15M1I7M1I17M2I21M3I8M1I10M1I10M1I18M1I7M1I23M1I8M2I16M1I5M1I17M1I12M1I23M2I6M1I5M1I6M2I18M1I8M1I20M1I16M3I9M1I15M1I14M1D5M5I8M1I18M597N25M1I6M1I7M1I8M1I10M1I10M1I11M1I10M2I6M1I13M1I3M2I27M1I18M4I4M2I16M1I34M1I20M2I13M2I35M1I15M2I7M1I24M2I8M3I8M2I9M2I9M1I7M2I20M1I14M2I26M3I22M1I7M1I4M1I15M1I7M2I10M1I6M3I33M1I27M2I5M1I14M1I16M1I17M2I20M1I11M1I8M2I16M3I16M1I9M1I9M1I6M1I4M1I6M2I6M1I14M1I7M1I12M1I8M3I22M2I8M2I21M3I14M1I3M1I13M1I5M1I17M1I8M1I4M1I6M1I5M1I6M2I13M1I9M1I10M1I4M2I11M2I18M2I7M1I22M1I9M1I7M1I10M3I20M2I11M2I2M2I17M1I10M2I16M2I9M2I2M1I11M1I9M511N1I9M1I17M1I14M1I6M2I9M1I12M1I11M1I6M1I5M1I7M1I14M2I14M1I4M473N1M3D5M1I22M1I10M2I8M1I9M2I25M1I8M2I7M1I3M1I6M1I6M1I5M2I7M1I3M1I8M178N10M1I14M1I3M1I17M1I14M1I16M1I13M1I12M1I11M2I24M2I5M1I8M1I7M1I6M1I7M1I14M2I5M1I10M2I37M2I14M2I7M1I9M1I10M457I6400N6M3I5M1I8M1I8M1I5M2I14M1I10M1I6M1I5M1I17M116S 1729427-1731895,1732524-1734689,1734836-1735201,1736096-1736692,1737810-1738320,1738449-1738921,1739058-1739235,1739509-1745908 1 chr1 1727385 1 0 72S1244M4D17M1I764M97I5276N104M290I3527N128M473N136M178N469M6019N145M25S 1729414-1734689,1734794-1738320,1738449-1738921,1739058-1739235,1739705-1745723 0 megadepth-1.2.0/tests/long_reads.sam000066400000000000000000000574121420302544700174430ustar00rootroot00000000000000@SQ SN:chr1 LN:0 m64047_190928_230921/118098529/ccs 16 chr1 1727385 60 114S423M1D677M1D321M1D72M1D581M2746N270M1D44M2166N64M1D81M2814N160M511N128M473N136M178N411M1D57M4546N418M1055N73M38S * 0 0 ACACACAGACTGTGAGAAGCAGTGGTATCAACGCAGAGTACTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTCCATATTTAACATTTATTTTACTTTGCTGAGCAAGAATCATAGACAGCTACTACCACGGCTGCTTCGTTTGGACAAAAATAACCAGGAGGCATCCACGGGATTAGTTACACGGTATCAACTTACCACCACAGCAGAATCAACAGTGACTCGCTAATTAACAGAACCGTTTGCTAGAAAGCACTAATCTAGTTATATAAATACTGAAATAGGTCACATGCAAAACACTATAAACGTTTTGTGTGATGTACTTTTAGTTCTCCATAGTTTTGTTTGGTATAAAGGAAATATAATTTGGCTGTGACGTAGACTGTTGATGTAATTTTCAAGTTTTCCTGTATGGGGAAAGTTGCCCTGACTGTGGCCCTTTTCAAGGTGGAGCCTCCAACACCACGTTGGCAGATTCAGACTCCGTGAACAGTCTAATGAGCAAGTCAGCTGAATGCCACTTTCAGATGGAAGGGAAATGAGATGGAAAACAACAAAAAAGGACTGCCAGGCGGAACAGTTTCCAACCGAGTTTTCGTTGAGTGAGGATCCAGCAGCCATCAAACTCAAACATAGGGGCCCGCAGGGAAACTGGAGGAAATACTTCAGAGACAGCAAAGCTGAAGGTTTCTGTGCTCTGAGGGATCCGAGAGTGGATGTCCCACTCCTGTATCCTCAGCCAGACACAGAACTAGCCAGATTCATTAGGGAAGCTCGGATGCTCTCATTCTATAAAGTAAGGCCCCCAGCAGGGCACGTACATACATTGGGAAAGAAGAAACCCCTTAGTACCATGTTGGTCAAAGGCAAGAGAGAGAATTCTATTTCCATCTGGAATGTCATTCTTGTTTACTTCTTCCAACAGTGAAATACTTCCAGGCCTTCGAAAGGCCATCCTTTGGACACATGTAAAAAGCTGTCTTGTTGGCCCGTTATTCCCACTGACCCGTCTGAGTGATCACCCAGGAGCGCGGCGGCAGCAAGCAGAGCTCACCGGATTTGGGACAAGGATTTTAAAGGCAGCTACAAAGCTGAGCTCTATTTGCTGATGATAGTCTCTGTTCAGCTGTTTAAAATGACTGTCTGACTCACCATGGTAATTTTTCACAAATTAAAACACATTTTGGGTTGTGCAACAGTGTTCTCATCTTTCCAGGCAGGCAGATTATTTTAATGCTGTTATACAGGGAATTGGGACTCTCGGATTTTCTTTTTTAACCTTTTTATGCCATTCAGTAGGGGAAGTTTCCTTGAAAGTTAGAGAGCTGCAAATCTCTAAGTATCAATGTAAAGAAGCAGATGACCCAATTCGGAAGGTGGTTCAAGTGTTCTGTTTGTTTACAAAGGCACAGACCACGACCATGGACACACCCAGTGGAAGTAACCACACCCGGTGTGTTCCTAGAAGCTCATCTGTGACAGTTCAACAAGAACTACTATTCTAGAAAAGTATTACACAAAGTTATTTAAAAAAATGTCTGTACAATCGTTAACACGGCCAAGCCAGCCTTGGGTTTTGCCTCTTGGTGCCCGGCTGTGCTGGGAATGCCATGAAGACCAGCGGCTGGAAACCGACTTGGGCATGGAGAGGAGACTGAGGGAGAGGGAGGGGACAGCACGACTGAGCAAGGGCACAGTGCTGGCTGCCTCATGGGCTCCAGGCTCCTTCTGCCAGGATGAGGAAGAGGCCCCAGAGCAGCGTTACACAGGAAATCAACCTATTTGCTAATCCTTTGGAAAAACGTTTGTTTCTGGTCCACAAACAGAAAATCCAAACAGGATGGCAGCTCCTTGTGAGGGTGGAGGGGAGGGCACCAGATGCTGTGCGGCTGGAAATTCCAAGGTGCTCAGAACCAGGCGCCTGCACCTCTCCTTATGCCAGACCACAATCTTCAAAGAGGCCGGCAGCCACATTCTCGACGGGGAGGTGGACAAGGCCACCCTGGGAGTTGCTTTCAATCTGTCCTCACAAATCAACAACTCCCCGCCACCTCCAGGGCATTTTCTAATAGTGTTTGTTTTTGAGACGGAATCTTGCCCTGTTGCCCAGGCTGGAGTGCAATGGCACGATCTTGGCTCACTGCAACTCCAGCCTGGGTCACAGAGCAAGACTCTGTCTCAAAAAAAAAAAAAAAGTTTTTTTTTTTTGAACCACTGCTAACAATCACTAATGTTCACTAAAACACTAGGCTTCAGGAGCATTTGGAAATAATTCCTGACCGCACAAAGAAACATGCTGGTGAGAGACAGTGACCAAGCCCAGGAGACCACCATGAGTTCCAGAAAAAGTCAGAGAGAGCAGCCACTTTTCCTGTCTCGGAGATACCTCCTACTCAGGACGCAGGAAGCATGGGGCAGGCAGCGCCATGGACAAGGCGACTCGGTGCAGGGCCTGCGGGACCTGAAAGTCACCGGGGAGATTTTCCCCATGAGGGCGTACGCCGTGACGCTCTGAAGGTGGAACAGACTCCGTCTGTCAGAAGCAGCAGCACCACGTCCTGGTTGTAGCTGAAGCTCTTCCCACTCCTCCCGATCACTGGGACGTCCTCGGCCTTAAGATCTTGTCGTTTCTTAAATAAGCGAACACGTGTGCACCCCCCTACTCCGTTCAAGATGCCGCGCTCTGTGGGCGCCTCTGCTCCTCGCTGGTTTTCATGCAGCCACACTGGGTACGCGACATGGGGCCGACATGTCACTGGAAATCGCCTGTGTACTCCCCCAGAATCATCCGAGACATGATCACCGTGAAGATGGGGGCGGAGCTCTTCACCGTCTCAGCAAACGAAACCGCCACATTTTTCAGGCTGACCAAACCCAAAACCACAGTTGCAAACCTCATCAGACCCACAAACAGCATCGTCATAAGGAAGTTGGGTGGGTAGGAAAGCCGGGCCTTGTGCTGATATAAACAGCAAGGAACGAGGGTTTTCACACACCCGATAACCGTGGTGGACAGCATCTGCACCGCACCTAGCATGCTGGGCTCGCCTCCCAGCAGGGACAGGATGTACTTGTTGAGGAAGAGCGTGCAGAAGCTGAAGAAGAACCACAGCGTGAGGTAGAGCAGCGCCCGCGAGCTCCACACACCCAAGTCTGACTCGATGACCGTGGTCTCCGTGATGGTGACGGTCAGTACGTTCTCATCTGTGCCGCCGTCGCTCTTGGCAAAAACAATCTTCTCACTTCGGTGACCAAACAGAGAGCCCCAGCTGAGAGGCGACCTGCCTTTCGGCTTCTCTTCGGAGCCAGGAACGAGCTCTTCCAGTGCTGGGGTTTTCACCGAGGACGACATGCTGAAGCCACAGCCACGAACGATTTTACCTCCAGGCTGGGCAGCATGGGTCACCGTGACCGCCCGGGGGTGGGGCCGCAGCAGGGACTCGGGCGCCAGGAACGAGGCCACCAGGGCCTCTCCCAGGCAAAGCGTAGAAGCAGACGCACCTCAAAACAACAAATGCTTTTGATGTTGGCTCCTGTGGGGCAGCAATCTGCGGGAAGCTTAGCCAGGCACCTCTGGCTTAAGGTCCCTCCTGAAGCTGCAGTCACACCATGGACCAGGGCTGTGACCTCATCCGAAGGCTCAACTGGGGCTGAGGCCCACCTCTGAGCTCACTCAGGTGGACGCTGGCTGGGTTCAGTTCCTTGCTGGCTATAGGTGGAAAGGGCCCCCACCAGTTTCTTGCCAGTTTCTCCACAGGACGCCCCACAGCCTGACAGGAGCTTTCATCCAGCAAGCTCATCAGGGAGTGGGAGAGAGCAGCCAGGACAGGAGCCCAGGCCTTTCTGAACCTCATCTCAGAAGTGACATCCTTCCCTTCTGCTGTCTGGGCACAGCTCCCTGGGTGGAGCCCGAGGACGAGAGCCTGATGCAGTCTCCGCCGCAGGCATAGCGCTAGGCCCCGGCGCCTTCACAACAAAGGGACGCTGGCCCCAACCCTGCGACTTCATTGCCTCACAGTCTGTGTGT bMh\e\clrhqltmti~>UNidTrJqttdx=ammplskswQSjaZ]dfekruuuuuuuuuuuuuuuuuuutm'P\uutolntsmhf`VLFKZepuvvvvunostqnuvvvuiqu%^LV[<5].~IaUh~GQT|r=]_efMlPqlrjl~di~Pimoirbijqlluwhu_f0gk~]iqmi~bmdr~SoI]dl|nu>i~VzXi~5aQ@mqt~Ztk~~Hg[AjYk*fieVvI;9]Tv2^qM[nEs~\skqjmcj~`rY~`ki[[]FX?ZljtzNzZtHknMf?xCl~~Sciga6foKalNht~a]RQZJ^Dmpn^eqFk]almyk@RZ~ZvSnkpfmj~~~Jinhutq~~Phj~irOktldmsplwwh{~~D\[m)OmzSptte~~~Iwv~6~CFnm~~V~V~~Fjz~~`p~B~Ar~thcjnBR]Hqq~v~ZesPge~St~~Sv~pq~~~D_={qscTb~~@~~Ut~a[~~Uxc~q~qt~Q~fB~~~6b~p~g}~Vfy~[~~b~hvv~c~u{~a}XvppwnPw|O~p~~W{u|~,e{zxYcz(c^_wc~]m~]xyt~}~Jg}~N{o~~8s}aoW~J~T~~O~~Gt|{yge~T~~~:gnTfc~~~~'|Cjkej~Qy~_T~Uq%b}s~~V~[~Tq\}~xy~x6s{sMm}o[v~~ftk~Oyyv{y~]myj|~GQyWn~Ipln]~~~Ry~E|u}~b9i~?`~~V~~V~~Vyds~kp\^fy~uq[v~~Myt~mWRlFxv9Ye^fl^ir~}~s~I~~~.}~qsx~~dhtum~~:}m~~al{~~z~a~odw~JwXjHfJ^b~Y^Tsy~d}yIbFu|~qyy~Gd3npmu~Uy~}qzt~v_~Ur~}~|zQwz~h~h~~~~=c{p}~~Nu~mRHntrinsbwY~~Wo~5`~hy~~FGo~)~e~scN~JZaf~clKRprlG~RqsDyuylnty~k~bklcw~_~Mx~ry~G~Xq|~u;~w~yr}jg~~Hmo~}~us_j~~}~W|rnpz~}~~~x~Uko}yzivtx_v]k~Eu~~u~~`imOIppfmguat@be~b~~FmM~`~a~;p~q|mWw~b~a}~d;|~^v_vz~HbaZ~V~W~s|~d|pi~Wvi~V]`~[Zv~Pv~vJ~Q~GncR~_~uo~\r~v_;b~QXEv~QlzSkn~b~RV'\Z~=~~~7nv~h}q~~I\{c~qw{~~@_^rS~~Jdu~aY~Zkrx~~,{~Qd~[~~Mc~~[kHY~~TZ}~gzacNkpvOjtwz~4s~Fln~g~m~Vo[z~~Aij~Uy~~V_{du{~^uqhwY~i}~auvt}~~r~]~Xr~uwR~}p~H.}~Ov{~`~~~O~vtt~z~yq~^_~q~`_x~TWdfht~~~~~~~~~~~*pwymos{}|ui_0p~Y|Vsk~}r\tUatOrv~q~~ffu~iu}ev~~~L\]j}h~ds~K\x~9~yvx~~QyQ~~My~XlW~D~}lzDyv~u~~My~~[tbX|r~~i|c~y~y~u~wwJl~^~nx~~Ny~V}_}~Ax~Sofw~z~H~b}y~~~~,n~wqOg_blzxv~y~Zsg~~~T~WVop`jf~g~ymRs~T~~>~~par{~TsB{s~~T~[\v}z~~~F]^~5vpxK|~X{~~e~u~b~cmaYjg\u=mcmd~~U~`~xw~~<~|@{i~~Jt~qg~_~~~G}yrw~~Tq~~8u~xe~~\wrn}ok~ZPCC~u{^\qw}~`~Tr`0wimcB~h~~PHvr~{~Ybo~ZuvxYi{vofuM\s{~~a}~\z^{z~ttfw~Usr~r~LX~.ehc~Mj~~Q}~~niil~~n~R~~~~~T~~`~~~`~d~~~~G~~g]z\~_]~~Kk~~~~~G~~}z~z~~~~~~~Z~Yw~~_~d~c~~U~~~\~~~}GRxZoRxk~~~9v~bng~Zdoi@~~t~Qm~}~Hp~~\~~H~~~_~~~~~~~n~~~}~ol~U~kRtc~W{u~~~E~mm~~m~~3z~Y~~}~~~~~rwb~~~~m~~Z~~~~hsg~~~~asC~~~~r~m~`~~f~Rwy~~{~g~~`~~S~~Z~~~~G~~_uz~}Hh~ys~[~~~~eP~y8t~~~ESy~~~~2||eD~fofy~~~N~~)W~Qb8IuO~~~~{~~_~~~~~~~aWlwyvMopo[wnj~~~o~~P~vf~`v~~~U~~P~R~~O~y~hB~_~m~~~~uy~~e~~~~{ex~i~q~~Y~\w~~~~g~~~~~\~~~~_~~~~H~~~Rx~I~~~}>~~~Y~~~~o~~~o~~~q~~~^nlx~Ul~Q~zy~[qod~~~~~|v~Oy~~<~pq~~`~x~h~~Oj~~N~~Yvh~d~~`~~~F~_~yF~~~~~x~[f~~{~~[~r~~^pzj[|~u~u~ NM:i:27 ms:i:3767 AS:i:3562 nn:i:0 ts:A:+ tp:A:P cm:i:1161 s1:i:3629 s2:i:3284 de:f:0.0069 rl:i:205 m64047_190928_230921/123078214/ccs 16 chr1 1727385 50 83S16M2I27M2I9M1I12M2I4M1I12M2I6M1I12M1I8M1I12M1I6M1I22M2I12M1I5M1I5M1I8M1I5M1I10M1I29M1I10M1I5M1I8M1I7M1I16M2I8M1I21M1I34M1I15M2I6M3I7M1I6M3I13M1I7M1I13M2I10M1I9M1I26M1I16M1I7M2I11M2I14M2I14M1I8M2I9M1I29M1I7M1I11M2I6M1I8M1I34M1I27M1I6M2I13M1I10M1I6M1I21M1I4M1I37M1I8M1I22M1I17M1I35M1I6M2I39M1I13M2I19M1I16M1I11M1I8M1I6M2I17M1I4M1I29M1I53M1I4M1I14M2I28M1I11M1I15M2I16M1I8M1I10M1I20M1I7M1I23M2I11M1I8M1I10M1I31M1I20M1I6M2I8M1I12M1I8M1I5M1I5M1I15M1I5M1I4M2I6M1I7M1I6M1I6M1I10M3I8M2I16M2I15M1I15M1I7M1I16M2I21M1I7M1I19M1I11M1I5M1I18M1I20M1I7M1I6M2I11M1I6M1I9M2I12M1I10M1I9M1I8M1I9M1I5M1I4M1I10M1I14M2I22M1I5M1I22M1I10M2I11M2I8M1I7M1I9M1I7M1I15M2I8M2I7M2I10M1I7M2I22M2I20M1I9M1I12M2I7M1I15M2I8M1I8M2I7M1I9M1I11M1I22M3I12M1I3M1I16M2I11M1I9M1I14M2469N9M5I26M2I12M1I4M1I6M1I24M1I6M2I4M1I20M2I7M1I13M2I17M3I11M1I9M1I9M2I16M1I8M1I6M1I3M1I5M2I14M1I51M3I17M1I5M3I16M1I6M2I9M2I13M3I28M3I17M1I14M1I11M1I14M1I8M1I7M1I17M1I8M4I7M2I7M1I8M2I11M1I6M1I19M2I4M1I21M2I9M1I15M1I18M1I6M1I11M1I3M1I8M1I5M2166N23M1I8M1I5M1I8M2I22M3I31M1I5M1I11M1I6M1I8M2I11M2I8M366N12M1I16M1I7M1I5M1I9M1I4M1I24M1I5M1I17M1I4M1I6M1I14M2I12M1I33M2I5M1I7M2I4M1I5M1I16M1I3M3I12M1I4M1I22M1I19M1I9M2I25M1I7M1I4M1I9M1I18M2I7M1I14M1I21M2I9M1I7M1I7M1I18M2I12M1I12M2I4M1I20M2I31M1I6M2I12M1I11M2I15M1I7M1I17M2I21M3I8M1I10M1I10M1I18M1I7M1I23M1I8M2I16M1I5M1I17M1I12M1I23M2I6M1I5M1I6M2I18M1I8M1I20M1I16M3I9M1I15M1I14M1D5M5I8M1I18M597N25M1I6M1I7M1I8M1I10M1I10M1I11M1I10M2I6M1I13M1I3M2I27M1I18M4I4M2I16M1I34M1I20M2I13M2I35M1I15M2I7M1I24M2I8M3I8M2I9M2I9M1I7M2I20M1I14M2I26M3I22M1I7M1I4M1I15M1I7M2I10M1I6M3I33M1I27M2I5M1I14M1I16M1I17M2I20M1I11M1I8M2I16M3I16M1I9M1I9M1I6M1I4M1I6M2I6M1I14M1I7M1I12M1I8M3I22M2I8M2I21M3I14M1I3M1I13M1I5M1I17M1I8M1I4M1I6M1I5M1I6M2I13M1I9M1I10M1I4M2I11M2I18M2I7M1I22M1I9M1I7M1I10M3I20M2I11M2I2M2I17M1I10M2I16M2I9M2I2M1I11M1I9M511N1I9M1I17M1I14M1I6M2I9M1I12M1I11M1I6M1I5M1I7M1I14M2I14M1I4M473N1M3D5M1I22M1I10M2I8M1I9M2I25M1I8M2I7M1I3M1I6M1I6M1I5M2I7M1I3M1I8M178N10M1I14M1I3M1I17M1I14M1I16M1I13M1I12M1I11M2I24M2I5M1I8M1I7M1I6M1I7M1I14M2I5M1I10M2I37M2I14M2I7M1I9M1I10M457I6400N6M3I5M1I8M1I8M1I5M2I14M1I10M1I6M1I5M1I17M116S * 0 0 TACCACACAGACTGTGAGAAGCAGTTGGTATCAACGCAGAGTACTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTCCATATTTAACATTTTATTTTTACTTTGCTGAGCAAGAATCATAGGGACAGCTACTTACCACGGCTGCCTTTCGTTTTGGACAAAAAATAAACCAGGGAGGCATCCACGGGGATTAGTTTACACGGTATCAAACTTACCCACCACAGCAGAATCAACAGTGGGACTCGCTAATTAAACAGAAACCGTTTTGCTAGAAAAGCAACTAATCTAGTTTATATAAATACTGAAATAGGTCACATGCAAAAACACTATAAAACGTTTTTGTGTGGATGTACTTTTTAGTTCTCCATAGTTTTTTGTTTGGGTATAAAGGAAATATAATTTGGGCTGTGACGTAGACTGTTGATGTAATTTTCAAGTTTTTCCTGTATGGGGGAAAAGTTTTGCCCCTGACCTGTGGGCCCCTTTTTCAAGGTGGGAGCCTCCCAACACCACGTTTGGGCAGATTCAGGACTCCGTGAAACAGTCTAAATGAGCAAGTCAGCTGAAATGCCACTTTCAGATGGGAAGGGGAAAATGAGATGGAAAAAACAACAAAAAAAGGGACTGCCAGGCGGAAACAGTTTTCCAAACCGAGTTTTTCGTTGAGTGAGGATCCAGCAGCCATCCAAACTCAAAACATAGGGGGCCCCGCAGGGGAAACTGGGAGGAAATACTTCAGAGACAGCAAAGCTGAAGGTTTTCTGTGCTCTGAGGGATCCGAGAGTGGGATGTCCCCCACTCCTGTATCCCTCAGCCAGAACACAGAAACTAGCCAGATTCATTAGGGAAAGCTTCAGATGCTCTCATTCTATAAAGTAAGGCCCCCAGCAGGGGCACGTAACATACATTGGGAAAGAAGAAACCCCCTTAGTACCATGTTGGGTCAAAGGCAAGAGAGAGAATTCTATTTCCATCTGGGAATGGTCCATTCTTGTTTACTTCTTCCAACAGTGAAATACTTCCAGGGCCTTCGAAAGGGCCCATCCTTTGGACACATGTAAAAAAGCTGTCTTGTTGGGCCCGTTATTCCCCACTGACCCCGTCCTGGAGTGATCACCCAGGAGGCGCGGGCGGCAGCAAGCAGAGCTCACCGGATTTGGGGACAAGGATTTTAAAGGCAGCTACAAAGCTGAGCTCTATTTGCTGATGATAGGTCTCCTGTTCAGCTGTTTTAAAAATGACTGTCTGACTCACCATGGTAATTTTTTCACAAACTAAAAAACACATTTTTGGGGTTGTGCAACAGTGTTTCTCATCTTTTCCAGGCAGGGCAGATTATTTTAATGCTGTTTATACAGGGGAATTGGGACTCTCGGATTTTTCTTTTTTTAACCTTTTTTATGCCCTTTCAGTAGGGGGAAGTTTCCTTGAAAGTTAGAGAGCTGCAAAATCTCTAAGTATCAATGTAAAAGAAAGCCCGATGACCCCAATTCGGAAGGGTGGTTCAAAGTGTTTCTGTTTTGTTTACAAAGGCAACAGACCCACGGGACCATGGGACACACCCCAGTGGGAAGTAAACCACACCCCGGGTTGTGTTTCCCTAGAAGCTCATCTGGTGGACAGTTCAACAAGAAACTTACTATTCTAGAAAAAGTATTTACACAAAGTTATTTTAAAAAAAATGTCTGTACAATCGTTTAACACGGGCCAAGCCAGGCCTTGGGTTTTTGCCTCTTGGGTGTCCCAGCTGTGCTGGGAATGCCCATGAAGACCAGCGGCTGGAAAACTGACCTTGGGGCCATGGAGAAGAGGACTGAGGGGAGAGGGGAGGGGGACAGCACGGACTGAGCAAGGGGCACAGTGGCTGGCTGCCCTCATGGGCCTCCAGGGCTCCCTTCTGCCAGGGATGAGGAAGAGGCCCCCCAGAGCAGCGTTACACAGGAAAATCAAACCTATTTGCTAATCCTTTGGAAAAAACGTTTTGTTTTCTGGTCCCACCAAACAGAAAAATCCAAAACAGGATGGGCAGCTCCCTTGTGAGGGTGGAGGGGGGAGGGGCCACCAGAATTCCTGTGCGGCCTGGAAAATTCCCAAGGTGCTCAGAACCAGGGCCGCCTGCACCTCTCCTTATGCCCAGACCACAAATCTTCAAAGAGGGGCCGGCAAGCCATATTCTCGATGGGGGGAGGTGGGACAAGGGCCCACCCTGGGGAGTTGCTTTTCAATCTGTCCCTCACAAATCAACAACTCCCCCCGCCCACCTCCAGAGGCATTTTTCTAATAGTGTTTTGTTTTTTGAGACGGGAATCTCGCCCCTGTTGCCCAGGCCTGGCTCCAAAAAAAAAAAAAAAAAAAAAAAAGGAAAAAATAAATTGGGCTGAGGTGTGGGTGGCTTCATAATTGCAGTCCTAGCGCTTTGGGGAGGGCCCAAGGGTGGGTGGATCACTTGAGGGCCCAGGAGTTTCAAGATCAGCCCTGGGACAACACAGCAAAAAATCCCCATCTCTCAAAAAAAAGTAAAAATAAAAATAAAAATAACAAAAAACAAAAAATTAGGCCCAGCCGTGGGTGGGTGGGACACCTGTAGTCCCCAGCTACTCAGGAGGCTGAGGCGAGAGGATCACTTGAGCAGGGAGGCGGGGAGGGTTGCAGTGAGCTGAGGATGGCACCACCACCACACTCCAGCCCTGGGGTCCACAGAGCCAAGGACTCTGTCTCCAAAAAAAAAAAAAAAAAGTTTTTTTTTTTTTTGAAACCACTGCTAACAATCCACTAATGTTCACTAAAAACACTAGGCCTTCAGGAGCATTTGGGAAATAATTTCCTGACCCGCACAAAGAAACATGCCTGGTGAGGAGAGGACAGTGGGACCAAGCCCCAGGAGGACCCACCATGAGTTTCCAGAAAAAAGTGAGAGAGAGCGGGCCCACTTTTTCCTGTCTCGGAGATACCCTCCCTACTCAGGGACGCAGGAAGCATGGGGGCAGGGCAGCGCCATGGGACAAGGGCGACTCGGTGGCAGGGGCCTGCGGGGACCTGAAAGTCACCGGGGAGATTTTCCCCCATGAGGGGCGTTACGCCGTGGGACGCTCTGAAGGTGGAACAGGGAACCTCCGTCTGTCAGAAGCAGCAGCACCACGTCCCTGGTTTGTAGCTGAAGGCTCTTCCCCGCTCCCTCCCCGATCACTGGGGGACGTCCCCAGGCATGGTGGGTGGGCGCCTGTAATCCCCAGCTAACTCAGGGAGGCTGAGGGCAGGGACAATCACTTGAACCCGGAGGCGGGAGGTTTGCAGTCAGCTGAGATGGGCACCCACTGTTACCCCAGCCTGGGGCGGACAAGACAGAGGACTTCTCTGGGCTGCCAGAGGCTCCGGAAGCCCGGGGTGCCCTCAGGGCCCGTGGGCAGTTTCCGTCACTCTCCAACCGCCCTCCCCCCCACAGACCTTCTTTTTTGCTAAATGGTATCAAGATTTTTCTCGTTGTTGTCAGCAAAGAGAGTTTGGTTTTTCTAACATCTCATCGACCATGGGCTGGAGGGTCAAAATCGATGTTTTTAAACTTGCTGGAAAATAAAACGGTTTCCTTTCTTGCATGGGCTCGATGAGCAATAAGGTTTCCCTCTGTGTCCATTTTGTTTTACGATTTTTTAGGATTGCTTTTTTAAAAGCCGGACACGGGCGGCTGATGGGCTTGTAAATCCCAGCACTTTAGGAGGGCCCGAGGCAGGAGGATCACTTGAGATAAGGAGTTTCAGGGACCCAGCATGGGCAAACACAGCGAGGACCCCCATCTCTATAGAAAAACACAAAAAATGAGGCTGGGGGGTTGCTCATACACGCACTGAGGGGTTTGGACCCTGGGTGTTCTTGCCCTTCTTAGATTTCTTCTGGAGCTGGAGATTGAACTCGGGACCTCAAGGCCCAGCTCAGGGAAACTGATTTATTTACAGCAGCGAGGGAAAACTGGAAGTTGGTGGTGGTGAGGAAAGTTATCATGGGGCAGGTACAGCAGCTCACGCCCTGGGAATCCCCAGCCACTGTGGGGGGAGCTGAGGCGGGAAGGATCACGAGGGTCAGGAGCTCAAGACCAGCCGTGGCCAACATGGCGGAAAACCCCCGTCCTCTACTAAAAATACAAAAAATCAGCTGGGGCGGTGGGTGGGCACGTGCCCTGTAATCCTAGCTACTTGGGGAGGCTGAGGCAGGAGAATCGTTTTGCACCCTGGGAGGGCAGAGGTTTGCAGTGAGCCCAATACTGAGGCCACTGAACTCCCAGCCTGGGGCGGACAAGGGCGACACTCTGTCCTCAAAAAAAAAAAAAAAAAAAAGAAAGAAAGAAAAGAAAAAAAAAAAAAAAAAGGGAAAGTTTATCATAATCTTTGTTTCTTCTTCTGCCACTGAAATAATTCCAGAACATCCCAAGTCTGGCTAGTACGTGGAATTTGGAGAAAAAAGGTTCACTGGTAAGTATGTCGTCTATTGCTTAGAGGAAAAATTCTGCCTAAAACCAAACTCAAAAAAAGGTGTACAAAAAATAAGCAAAAAAGCGTCCCCCCAGGATTCCCACGCTCCGGGACAGCTGCGGCACGTCCGCCAATCCTCGAGGACTCGGTCCTTCCCAGGTGAAAATTGGTGGGCAGGAGAATCCGCGTGAAAATGGGGACACAGCAGCTCACAAGGGTTTCATTCGGGACAAAAGCCCAGCAGAACAAACGTGGGAAACGCAAGGTCCCAAACTTTTTTTTCTGGTATCGATAAGAAGCTCACAGCCAAGGGCTGTTAATTCTGAATCCCCAGAGTTTCCCCAGTTTTCAATTGTTAAGAAAAAATGACTAACGTATTACTCACAGTGAAAAAAACAAAAAAGACACACAAAATACAAAAAATTAGCTGGGGCATGGGTGGGCACATGCCTGAGGGTCCCCCAGCTACCCAACAGGGCTGAGGTGGGGAGGATTGGCCTAAGGCCTGGGGAGGGCCCGAGGTTTGCAGTGAGTCGTGGACCTCGCCCACTGCACTGCAAGCCTGGGGGCGGACAGAGCAAGACCCTGTCTCAAAAAAACATAAAAAAAAAACAAAAGAAAAAAAAACAAAAAAAAAAACCAGGAGTTTGACTTTTAGGCTTTTGATTTTTGTTATAATCACCTAAAACGTGTGGTGCGGGGTCTCCAAGTGGCATGTGGGGGGCAAGCCCAACCCCCATCCTGGGACCCTCGGCCCTCCTCTCCCGTCCCCCAAAGGGCAGACAGACACTTTCCCCTCGGCCCTTAAGATCTTGTCGTTTCTTAAAATAAGCGAAACACGTGGTGCACCCCCCCACCACTCCGTTCAAGATGCCGGCCGCTCTGTGGGGCCGGCCCTCTGCTCCTCGCTGGTTTTTCATGCAAGCCCACACTGGGTACGCCGGACATGGGGGCCTGGACATGTCACTGGGAAATCGCCCTGTGTACTCCCCCCAGAATCATCCGAGGACATGATCACCGTGGAAGATGGGGGGGCGGAGGCTCTTCACCGTCCTCAGCAAACGAAAACCGCCCACATTTTTTCAGGGCTGACCAAACCCCAAAAACCACAGTTGCAAAACCTCAGACCCCACAAACAGCATCGTCATAAGGGAAGTTGGGGTGGGGTAGGAAAAGCCGGGGCCCTTGTGCTGATATAAACAGCAAGGAAACGAGGGGTTTTTCACAACACCCCGATAAACCGTGGGTGGGAACAGCATCCTGCCACCGCACCTAGCATGCTGGGGCTCGCCTCCCAGGCAGGGGACAGGATGTACTTGTTTGAGGAAGAGCGTGGCAGAAGCTGAAGAAGAAACCACAGCGTGAGGGTAGAGCAGCGCCCCGCGAGCTCCACACACACCCAAGTCTGACTCGATGGACCCGTGGGTCTCCGTTGATGGTGGACGGTCCAGTACGTTTCTCATCTGTGCCCGCCCGTCCGCTCTTGGCAAAAAAACAATCTTCTCACTTCGGTGACCAAACAGAGAGGCCCCCAGCTGAGAGGGCCGACCTGCCCTTTCGGCTTTCTCTTCGGAGCCAGGAAACCAGCTCTTTCCAGTGCTGGGGGTTTTTCACCGAGGGACGGACCATGCCTGGAAGCCACAGCCCACGAACGATTTTTACCTCCAGGCTGGGGCAGCATGGGTCCACCGTGGACCCGGCCCCGGGGGGGTGGGGGGCCGCAGCAGGGGACTCCCGGGGGCGCCCAGGAACGAGGGCCCACCCAGGGGCCCTCCTCCCCAGGGCAAAAGCGTAGAAGGCAGGACGCGAGAGCCCTGATGCAGGTCCTCCCGCCCGCAGGCATAGCGCTAGGGCCCCCGGGCGCCCCTTCACAAACAAAGGGGACCGGCTGGGCGGGCGGGGGGGCCTGAGAGGGCGCCCGCGGTGGAGGGGGCCCGGGGCCGCGAGGCCCGCGGGAGACAGCTCGGGAGCTCGGGCCACTGGGGGAGTGGGCACAGCCGCTGGGCGGGGAATCCAGGTGGGGCTTCACGGGGGCGGCCCCCGCGGGGACCGGAAAATGACGCCGCAGAAAACCCTGCATCGGGCCTCCTCGCTGCCCCCGCGGGGCGCCCGCTCCTCAGTGCCCCCAGAGCCCACGGGAGCCGGGGGAAACCGCGCCCCGGCGGGCCCCCACAACGCCCCCCCCGCGGCTTGCCCCGTCCCCCCAACCCCTGCGACTTCATTTGCCTCACAGTCTGTGTGT *M:WNb`oZjtp~}~Ql_~I~~~~~6~c~~~~~\,We_L~~l~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~)~`~~~~~~~3r~~~~Q~~~~~<~q~~Bk~~j~Tg~~n~Uu~~~~~~$~T~~{+~~~My~*B~~IOtP_/~~TB~~~~$~H?o~~~~~Vx~~/~U~~~.~~4~j|~9~~~~~3~~B~~~~P~~~~~cx8t9<~ED~Ra~~2~~n~~h~t~~~@~Y~WUi~~~~2~s~t~~~~.~m~~GUr~~~G~G~~~~D~~~~~~~~*~M*3~~~L~6~~~~h(~~@s~~8u~~~f~~N~~~@1y~}~~h~~~~~/~~~J~~~~~-e~~~~aL^Xe|.%~g~~~~~~~~H~~~c~~~S~~~~~~~~~4h~~/5~,d.n~~[~~~~G=tc~~~~j~~M)A~~9~u~angg[~C~A{~~~~~i~~~D~~~~~~~~:~_~~~~~~~~~(+~~'yZ~~$~~~~ME.~~Ly~~~~)N~~H~~~~Zv~e~^~~~%~<~'mi~;~M4~~WLRoO~$~~BfP~~~+~tm,^\r~3~~~z~(e~~~<~~~B~M~~~~UnTR~~~~d~~O=E~~~~~~Ut~~4XN~&~x~~~1Y~~)~k.~m~~:9~~~~(~~f~~~~~~~-j~.~x~g~B~~l~~@~~o~~}~~~L~l]~*~@y~Zc~~~NdS~AVNaoC~~.~~~&~~x2~~L~JH)~~G~~~{~~+p~~~~~~~4{~~4~^EM~~,~~@}~~~6~~M~~$H~~~CRh^~9~VwJ~~~/c~~u~~~A~~~-M~{~Shy_~~~~~/.V~^fq2^WRh~>~R_2S~~~$~oU~3~~qY{~~1~N\~~%~BD%IX~~E~~O~~nB~q~~@zgF~~~~~~?D~(`~~%~}2~d~+~6~t~~_p~Uc~~EWO~/~+~~~~(~~~~~~~2)V~~^R%~hM~~x~k~~)~~e~~Tn~~*1~~~>~~~}|a~J~~E~f~~J~%~~U~H?~:Y~~~l~:~p~GS~I~~C~;~u~~~~+~fqh=~~3~~a~~Ui~~0~~~~~~G~n~N~~~~K~~)E~~~F~Z~~~,~M~~@~~~TR~~~~g%fSI%S(~G%~~~C~v~}~~~~~,M~~~)~K~JG(=G~]m~H(m~6~t~?G~Hr~~~~K8xY~~EE~~~~~~Or~~~~~~~'~~~@~4pM+r~A{~~~Zv~P~~~~~jv~~0~x~~=s~~~1IQ~E~4~~~~6~4`j~q~~~$~~~j~~~~A~9~e~~[k[~Y~R~~~~~~~OBY~~~~~:~V~g~~~~~;n~~~~;~~%~~~~~~~~~(~X*~~1~W~gP~~R~~9bu~7~~R\[~~~~7~h~S~~x~~~v~~~~~_]~~;}|~$;:~$~bDOPm~~B~e~0~~A~l~_:`~L~M~b~.q8d~~?~n~~~~9f~~~~d~~>`~]~;\~$6~~N~~~(~m-~~M~aQ~~%~4~~~+~U~{l~N$`L~h~~~~;~i~~~~/~~~~~5u;~~[I~~(YM~~v%~~/0~~&Fi~~s~~E_~~9~S&Z~YIY~~~~M~^^~V~~~,i{~~>~vqvt~j~~~1~~~~~1~fE~~C_~~~~=~rpZeG{~*~C`LQe~~sx~~%~~~~=~D~~~~r~>~~~~~M~~h~KU~~~~,~~~~~Ks~~~:f~~\~~~3~o)~~;~~=T~~~>~~ku~~E~~~E~~g~*~OIi]a~)~~GC;~~~)~~;~~~f{~-Xg~~4X;^~w~~~(~\Z(~~eK~~NmC~~X~~~b~QV~*~~~X|Qe~v~g~m~w~~~/~L~~(~-Te~2~~~~$~u~V~~~+~Y~~IM0@~B`6~{~~~~l~a]~.~U~8R4+9~:^+P~5;~w~~[^~~~~'~~1~~~BD~~p~~.~~&f~~-~~~~:~b~cZ~~~~=a~_~f~~L~~'~6~~~~Hrz~Ui~:H~~~~~~4b~~'~~a~~(~bw}8~F:~~~/~~~~~~~~~~~~/n~~~~~Ae~8~~~~(~'W~[,~~~~D~r~5@~~V~~VsAp~aSR~_h~~~~~~~~~~~~~~~~~~~~~~)~$H~~~~;~~~4~\~~5(~g~~2~~~~~4j~NSlA~~~~:~$~~~z~~a~~z~~~~~.-~~)~~~;~~F~f~~@~q~G~~*\Ou~~~|~~~~(~~C~~K~~~~Ux~~p~~~~~~~8~~~NVc~4^~~~Y~~~~~~/^e~~):.g~m~ae~~~~~~2~~~~~~'~~~~~-~~~~~8n~ar~~~~~,~~~~~~%~v?B*~~^~~~)~s~~-^q~H~~~8~[~~n~~r~~~~~~0q~jtN~{u~~6~~U~~y~~H}I~b~~.~lr^]~nI~k~~~~]~~;~~~~)~~~*~=rZ~~~m~4~QC~~;{n~$uR~?~~N;~@~d^0z~'~UX~*~~~~:B~S~~~~p~~B~\V(hA~~~~~.~~,*X~~~~~~~~~~~~~~*_v~~~~~~~~~~~~$kj~$~D~~~~~~~O~~NYi5_~~~[Zl~M@~~~~~~~7^~p~~~@S5~O~~~=~~z~~~ge~1~~-~~~h~E~,~j}~~%~_s~~~+l~~&anPrt&~~]~Kqw.~Jg~;~&}~gw~S}~Y~x>j~~;~~6~o2jk~<~~V*%8~lZ~&~Q~}~~~~~:~l_y4~=~,~/~~~.k~%y~~~~~S~RnJ~)}o~O~~sY]M~8[_~'~~U~~~~~*~W~~~~)~S=a~~|~~~(>s~~4~~Le1~Z~~~~E{U~io~)fPSM|k~MGEG~~~~~/~8gg+.~~&W~$~\~~J,~vJ~/~~~&~~~~~~)W~~~KZE`~~~~H~~~0~d2~1~~~~'yBl44I~R~k~Z~~_~/~~~~8<(U&~~h~r&~~~A~w~\b|T~[H~%+~26~'NP~&~~;%~%~~~~~Om~hy5T~X~%h~~_~(~~~G`~~~=~~xo~^~~~~~%\wSRQ~~%~~1Z~~~.~~~=~~~T~{~T~~q~t~~~~6~/ihh1~~Jx~~+~~+7~e~~~7~~~~6~~~Nrh~~~3m~0~~A~r~~@~~~'y~_~~0~6Ejk-g~/~[~G~m~~)fPp~5k~Hh~~~s)^j~W~~KM~~U~Fk~r~~~~~,c~2~r~f~oP~~L~}~~Fi~V~~~~~)0~%~~~d~]~~6~M9~L~~~(n~>~ld~~~2~~~]~~~~~Eoer~~~:>~~'~~~,~T~~~~08~'w~K~~~~.~~*w~({\m~~~~~Tt~~Cbi~~~~~~~~ol`ej~%~~~<~~o{~~4qs~z[~2NBK~~~~-~zGwN~~~~~~&~,>8o~jxx~~~~1=3f~.`}~+~~~3t~Q~~~%e~~&~[~$~~{~:y~G~~~.&~~~%5'%/~b~~~~~~~~2~~~~~~~~1~l~M~~~~~-~ZXK~3~~~:~~~5{}a~~UN~7Wn~~NP~JU~0~r|~~~C~~;~Zsm~~~/VSa~1~H~c~~~~$~~A~N~-4~)ng}Sdo~3~/~~Kp^r~~~~~~~~~~~~~~~~~~1[~~Ci~~qmr~~,Mf~r~~~~~~~~~~~~~/9~,g~*cB~5~z~~~~~~~~~2WR~6~~J^~~~~~~G~~~~~~d~~k~5~;~^~I~~~~~3~~mxc~~FZ~~~~~~~T@*~@~~$~W~~~~~~~5r1~Jwk~~~m~~u~~HFMZ-wt0~l~5j~~c~~~~7H~~p/~O~~~~:~~~~'~;K~E~~~~~~~~~4~`s~~^~~~~~~,~~~~~~~~~r%/5t~~~~~~-~~P~dD~T0[+~D~~Xf~/B~lwz`~(.*\S}U~~B4=,~E~~2~Vp~~CNF~r~(0=-~b~~+~~n;5V~~C~ad/m~~(~~~@j`~3~~6~>z~,7~~.~~~~%9'~laU~~~B`l~~~\~~<<~/y~~~h~~:~z{~~<\~~&~%~~f~V~t~,~~~~~=~~?9~B~B~1e~~:~~IFJ~~~~~x(U9~f~~~~~e/~gd~J~~~[kUn6~+~l~~C~~`~*~a~W~~x~L~~~~,oT~t~~9B~~*~o`~~/U~{~P\~%~~ej~~~~*~-aU~~V~~~~K%mu~>~~~~~hw~~~~~4fT~~~~%&dA~@wJ~~~/~~~~~~~~4~~~~P~~~~-Rx~~~(~~~2a~jVv~~Q~S~~~I^N~~~'yC\]y~~b~T~~~~&=~~~~~?y~u*)~~~k}~~~O^V>~~~-~~T~~~X~^~~F~~~~`~_w:j30%NT~~~4~L~~*~~~~~cYdnt{t~~~~+^~Iq~~e~,^~~~?@u~'~~~.~~~+~r~5F^6~~(^(5~~P~L~~~~O~~0`~X{~~V4d&xN~~~~,~~~*~~~~~~1~~~~'p~i~>;_6~~~^~~~}~~)~~~~~~~~6f~~eUd9~~~gR~~~~~%~~l~|~~~~~Ll~pW`:5~G[5Y1EII~~~~~~~G~_~5\~Ila~~{~~6~a\Dn~\~~~~Zgx~~j77Mb~9~F&y~~~-q~~'54w>L~~~~~~~?T%~~\SkueK`D~U~~~]~~5~J~~~=~f~Y%8}~~~~~~8~}~g~GAi~M~~~3nQV~~&+x~~z~~~5~Pp~~~~~~~~2q~+~y-i~~~G_|~Uo~f)~Q].c~~i~S~~~H~;~~~5~~~~~~~~5o~~~/~e1~~^~~LC~~)~~~~1~6~~~~~*Lr~~~0~SaF|s~~~l(q&~~2fb$)W~~_Z0i~~~~~@~UV~:D~~8~~~~:~~~'4~~1d~\~~~P~~B~meG\?h'~~x~~~%~~~~~$~;=~'~p~~~~9~~~~G*RK6IB`^~~.z~~t~,~h~~~~:n~~0:)K~S~J{~8~~~,~~{~^t~lq~~v~~mV~~~~/&~a~~~~~~?w~Cc~~~~;~~~~-~c~~7~~W~{~+C~~e~?B~,f~~~8~nR~c~u~~~{|~I0^~A~@DfgZiK~F~~~,xaDi0t~`[>~~~/~ZA~Z~U~D427EYt8~~A~~ELu~~~~~h~~~~%~~~<~~~~(&8t~Afa,~3N~I~~<{~~X~~~~$Q~~~~~~R[~E~~Lp~W]-A~~<~~~vok~~3~~P~~R~m~N~~~]~E[~~0Z~~~~~0j~~~~~)~C~~~~~~~~~3|p_~~[a~~~'W8J~3~~*~H~v~~~Gy~K~g~0~~~~4=~,~~7~~~~&~~~82P~~8~;~6It~=wKr~~=f~~N*9L~(C~>vQPM<@~~*~~0em~5z~~.wxn~:,Ssu~EhE~~~~Q~~~~&9~6~TX~~7~~w~|q~0~~~Cn~~Mv~v~3\~~~N~~~+u~~~~~~8~M~sI[\m~7nUU}+`I~e\~$~~{~~&d~Yf~~/~IP{8~~Bcs1fKe~QT,~_M~3:~~~E~P~z~~~,y%e~~~~~~J~~m~~G4zj~~~1kN~~~,yz~~&~F&~?~~Ez~~~C]~lJM,H~~~%&,+h~~~9dq~~~5~~V~Ar~~S~oy~~~B~TC~~~~.~~X~~pop~~~;G/?~X~~~Yyw|~~~'~}~~~)F*g~WbNysu|~~P)7Q~~~;~~~l~~M~~~~;~F~w~~~~L~~2+2hS7C~Z4i3~~~7P~~o7]D~/~bc~~~~~~'~j~~%LJ~~~~\~~~~~~~)~;3~~'~~~7=_~c]f%~>~~=~:t~~4JQo`Y^=8? NM:i:1144 ms:i:1767 AS:i:1547 nn:i:0 ts:A:+ tp:A:P cm:i:330 s1:i:2133 s2:i:1568 de:f:0.0882 rl:i:634 m64047_190928_230921/125567901/ccs 16 chr1 1727385 0 72S1244M4D17M1I764M97I5276N104M290I3527N128M473N136M178N469M6019N145M25S * 0 0 * * NM:i:416 ms:i:2471 AS:i:2702 nn:i:0 ts:A:+ tp:A:S cm:i:904 s1:i:2850 de:f:0.0093 rl:i:95 megadepth-1.2.0/tests/test.bam000077700000000000000000000000001420302544700216202test_noprefix.bamustar00rootroot00000000000000megadepth-1.2.0/tests/test.bam.bai000077700000000000000000000000001420302544700231042test_noprefix.bam.baiustar00rootroot00000000000000megadepth-1.2.0/tests/test.bam.mosdepth.all.per-base.bw.bg000066400000000000000000000017161420302544700233440ustar00rootroot00000000000000GL000219.1 0 168544 0 GL000219.1 168544 168619 2 GL000219.1 168619 168621 1 GL000219.1 168621 179198 0 chr10 0 4358517 0 chr10 4358517 4358578 1 chr10 4358578 4359039 0 chr10 4359039 4359045 9 chr10 4359045 4359059 10 chr10 4359059 4359067 13 chr10 4359067 4359086 14 chr10 4359086 4359122 16 chr10 4359122 4359131 17 chr10 4359131 4359137 14 chr10 4359137 4359156 13 chr10 4359156 4359159 4 chr10 4359159 4359185 3 chr10 4359185 4359188 2 chr10 4359188 4581019 0 chr10 4581019 4581030 1 chr10 4581030 8458609 0 chr10 8458609 8458622 1 chr10 8458622 8722264 0 chr10 8722264 8722314 1 chr10 8722314 8722446 0 chr10 8722446 8722507 1 chr10 8722507 8756699 0 chr10 8756699 8756714 6 chr10 8756714 8756761 20 chr10 8756761 8778558 0 chr10 8778558 8778617 1 chr10 8778617 8780518 0 chr10 8780518 8780528 20 chr10 8780528 8780543 14 chr10 8780543 8780567 11 chr10 8780567 8848720 0 chr10 8848720 8848742 1 chr10 8848742 8870679 0 chr10 8870679 8870690 1 chr10 8870690 130694993 0 megadepth-1.2.0/tests/test.bam.mosdepth.annotation.per-base.exon_sums.tsv000066400000000000000000000003231420302544700265730ustar00rootroot00000000000000chr10 3104118 3104229 0 chr10 4358477 4359470 1781 chr10 8722218 8725760 111 chr10 8729327 8730436 0 chr10 8756628 8756761 1030 chr10 8780518 8780620 674 chr10 130592156 130592705 0 GL000219.1 150000 170000 152 megadepth-1.2.0/tests/test.bam.mosdepth.bwtool.all_aucs000066400000000000000000000001661420302544700231670ustar00rootroot00000000000000ALL_READS_ANNOTATED_BASES 3748 UNIQUE_READS_ANNOTATED_BASES 3543 ALL_READS_ALL_BASES 3864 UNIQUE_READS_ALL_BASES 3576 megadepth-1.2.0/tests/test.bam.mosdepth.unique.per-base.bw.bg000066400000000000000000000014501420302544700240750ustar00rootroot00000000000000GL000219.1 0 168544 0 GL000219.1 168544 168571 1 GL000219.1 168571 168619 2 GL000219.1 168619 168621 1 GL000219.1 168621 179198 0 chr10 0 4359039 0 chr10 4359039 4359045 8 chr10 4359045 4359059 9 chr10 4359059 4359067 12 chr10 4359067 4359086 13 chr10 4359086 4359122 15 chr10 4359122 4359131 16 chr10 4359131 4359137 13 chr10 4359137 4359156 12 chr10 4359156 4359159 4 chr10 4359159 4359185 3 chr10 4359185 4359188 2 chr10 4359188 8722264 0 chr10 8722264 8722314 1 chr10 8722314 8722446 0 chr10 8722446 8722507 1 chr10 8722507 8756699 0 chr10 8756699 8756714 6 chr10 8756714 8756761 20 chr10 8756761 8780518 0 chr10 8780518 8780528 20 chr10 8780528 8780543 14 chr10 8780543 8780567 11 chr10 8780567 8848720 0 chr10 8848720 8848742 1 chr10 8848742 8870679 0 chr10 8870679 8870690 1 chr10 8870690 130694993 0 megadepth-1.2.0/tests/test.bam.mosdepth.unique.per-base.exon_sums.tsv000066400000000000000000000003231420302544700257270ustar00rootroot00000000000000chr10 3104118 3104229 0 chr10 4358477 4359470 1603 chr10 8722218 8725760 111 chr10 8729327 8730436 0 chr10 8756628 8756761 1030 chr10 8780518 8780620 674 chr10 130592156 130592705 0 GL000219.1 150000 170000 125 megadepth-1.2.0/tests/test.bam.names.alts.tsv000066400000000000000000000003451420302544700211340ustar00rootroot000000000000000,168613,X,C,, 1,4359137,S,2,,+,T,2 1,4359137,I,T,, 1,4359121,D,1,, 1,4359121,I,TT,, 1,4359137,I,TTT,, 1,4359090,S,6,,-,A,1 1,4359188,S,1,,+,T,1 1,4359121,S,4,,-,A,2 1,4359188,S,1,,+,T,1 1,8756741,S,3,,-,A,1 1,8756742,S,4,,-,T,2 megadepth-1.2.0/tests/test.bam.orig.alts.tsv000066400000000000000000000003251420302544700207670ustar00rootroot000000000000000,168613,X,C,, 1,4359137,S,2,,+,T,2 1,4359137,I,T,, 1,4359121,I,TT,, 1,4359137,I,TTT,, 1,4359090,S,6,,-,A,1 1,4359188,S,1,,+,T,1 1,4359121,S,4,,-,A,2 1,4359188,S,1,,+,T,1 1,8756741,S,3,,-,A,1 1,8756742,S,4,,-,T,2 megadepth-1.2.0/tests/test.bam.orig.frags.tsv000066400000000000000000000002541420302544700211270ustar00rootroot0000000000000023853 11 117 9 99 1 77 1 STAT COUNT 22 STAT MEAN_LENGTH 11982.364 STAT MODE_LENGTH 23853 STAT MODE_LENGTH_COUNT 11 STAT KALLISTO_COUNT 11 STAT KALLISTO_MEAN_LENGTH 111.727 megadepth-1.2.0/tests/test.bam.orig.softclip.tsv000066400000000000000000000001031420302544700216410ustar00rootroot0000000000000021 bases softclipped 4711 total number of processed sequence bases megadepth-1.2.0/tests/test.bam.read_ends.both.unique.tsv000066400000000000000000000011431420302544700232500ustar00rootroot00000000000000chr10 4359040 8 chr10 4359046 1 chr10 4359060 3 chr10 4359068 1 chr10 4359085 5 chr10 4359086 1 chr10 4359087 2 chr10 4359088 1 chr10 4359091 1 chr10 4359111 8 chr10 4359114 1 chr10 4359117 1 chr10 4359118 1 chr10 4359122 1 chr10 4359131 3 chr10 4359137 1 chr10 4359156 8 chr10 4359158 1 chr10 4359159 1 chr10 4359185 1 chr10 4359188 2 chr10 8722265 1 chr10 8722447 1 chr10 8756700 6 chr10 8756715 14 chr10 8756739 9 chr10 8756742 1 chr10 8756743 1 chr10 8780528 6 chr10 8780543 14 chr10 8780567 11 chr10 8848742 1 chr10 8870690 1 GL000219.1 168545 1 GL000219.1 168572 1 GL000219.1 168619 1 GL000219.1 168621 1 megadepth-1.2.0/tests/test.cram000066400000000000000000000112631420302544700164410ustar00rootroot00000000000000CRAMtest.cramWH5Ln0{؋~qB *DMW4]gG}RV=io<'..xj$ق ,k0x?\Nle焻v`GfF)Y&|L;b8?Qt6i# '[RJe鮎D rW=ɫB#Ȅz.ԲvX '7gaS_u*յccv }ּPJ?[fiQ 6Bk~ٻ7Jd\Z7dάfRjj;jNZ u]AӠxeޔ4 ꅖi TFym>bJn6(eRi3M#ل&]Z96>6x?̞Hu]E6#0bрFaR2? "{({PrSH0:Eo*܁lQhy™^$x8 \l^!̏WkQS~}Y#9WxJ-.yv? 8?м~Nl m6ي|圓嚓lII('s. 6[_Hۜ=++V’aM"S&YTDHXTASMCAMCX0CX1CXMCXOCXGCXAZXTAX0CX1CXMCXOCXGCXTASMCAMCX0CX1CXMCXOCXGCSMRNAPBFCFRLAPRGMFNSNPTSNFTL FNFCXFPBSIN BB*%MQRN QS RISC XTA XTAXOC XOCXMC XMCAMC AMCSMC SMCXGC XGCX0C X0CXAZ XAZX1C X1CMYNN’aM  XTAXOCXMCAMCSMCXGCX0CXAZX1Cu`t m Ή%/ Oru 047000221255246134U6`* Er' P 0 YI "DC FuI577s.Y"3H`dGZ^QүqKfh DRZcefxBc`$uy>uc`dSZ:rcPUݶ\c36/4Tc`bp :cd${$Ktcff4*R c`dThXTA /LXOCc``AiyVXMCc`bp.AMCc`AeSMCcP 0PXGCc``A㯗X0Ccfdf(1XAZLIMʱ  x1 N&*k|U(?|04I"Ik~('X1Ccd`%q@xO 17V#](Ng TD NHCHICASCnMCSMRNAPBFCFRLHAPRGMFNSNPTSNFTLFNFCFPBSIN DLBATBB*%MQRN QS RISCRS"InMC nMCHIC HICNHC NHCASC ASCWCC17V#] "nMCHICNHCASCJ=?m0cG8/ <89r y:?h\ tUuhV"7@e-ˇ4+n4VREӫ9aFUr 턎rdt15=U{tpc6Ay8?g!ޭDYqyğxgWFQ`SQ+(J[0{ddN"GC<#Ͼ܁M3CdJfy5Ĥ:kpD}H6PgY}DP0jI&Ѿf`8ʭjXXs:5ء//#(Ka8N ((XXXXl>"   a a a_C -A 0x,)ns'M2ysygypREA 00D lȈKD܋u9sCDDDDDDy0i1]mA 0 0\/*بX"d]4]nc8w `[7q1 0220002<]pyö9@ g@ P8{.:na3iAU @1m|:gKlI$I$I%I.'I$I$I$IFƶc#1cD Ȁ].1Ξ:!1@фL`5f<'[jNbN34{[^z^r,7r,7x޷@?ԩNu+}O9oځC M04C:::k].%#LVMA 2_dQe2zh?G@_"'!_d_6l#6+FmdF1b8b|FyBtE^ y!/䅼B^ ͅBs\h.tѹ펶b*g_GG>]]Q 0 CK0S*m&VG3+ tF]qB9DIONSXi#,NvT Z5Fo+T7cR]} OzȈYU @4-Xt]0Bn}ߣM)s:8.bD-8GW|y59Ums缾YB:h )%+;3㻙? lΑ!0N'N NdAS1K [葜^RC  $?]p\HIC/]} 0%Y* ;K+-d#Ç?;D]weNHC,]cfFLLLXT C,,,v]ASCX]- /0]zw iTV<@O10jvsJ )(i E]WC0EOFOcKmegadepth-1.2.0/tests/test.cram.coverage.tsv000066400000000000000000000033661420302544700210530ustar00rootroot00000000000000GL000219.1 0 168544 0 GL000219.1 168544 168619 2 GL000219.1 168619 168621 1 GL000219.1 168621 179198 0 GL000219.1 0 168544 0 GL000219.1 168544 168571 1 GL000219.1 168571 168619 2 GL000219.1 168619 168621 1 GL000219.1 168621 179198 0 chr10 0 4358517 0 chr10 4358517 4358578 1 chr10 4358578 4359039 0 chr10 4359039 4359045 9 chr10 4359045 4359059 10 chr10 4359059 4359067 13 chr10 4359067 4359086 14 chr10 4359086 4359122 16 chr10 4359122 4359131 17 chr10 4359131 4359137 14 chr10 4359137 4359156 13 chr10 4359156 4359159 4 chr10 4359159 4359185 3 chr10 4359185 4359188 2 chr10 4359188 4581019 0 chr10 4581019 4581030 1 chr10 4581030 8458609 0 chr10 8458609 8458622 1 chr10 8458622 8722264 0 chr10 8722264 8722314 1 chr10 8722314 8722446 0 chr10 8722446 8722507 1 chr10 8722507 8756699 0 chr10 8756699 8756714 6 chr10 8756714 8756761 20 chr10 8756761 8778558 0 chr10 8778558 8778617 1 chr10 8778617 8780518 0 chr10 8780518 8780528 20 chr10 8780528 8780543 14 chr10 8780543 8780567 11 chr10 8780567 8848720 0 chr10 8848720 8848742 1 chr10 8848742 8870679 0 chr10 8870679 8870690 1 chr10 8870690 130694993 0 chr10 0 4359039 0 chr10 4359039 4359045 8 chr10 4359045 4359059 9 chr10 4359059 4359067 12 chr10 4359067 4359086 13 chr10 4359086 4359122 15 chr10 4359122 4359131 16 chr10 4359131 4359137 13 chr10 4359137 4359156 12 chr10 4359156 4359159 4 chr10 4359159 4359185 3 chr10 4359185 4359188 2 chr10 4359188 8722264 0 chr10 8722264 8722314 1 chr10 8722314 8722446 0 chr10 8722446 8722507 1 chr10 8722507 8756699 0 chr10 8756699 8756714 6 chr10 8756714 8756761 20 chr10 8756761 8780518 0 chr10 8780518 8780528 20 chr10 8780528 8780543 14 chr10 8780543 8780567 11 chr10 8780567 8848720 0 chr10 8848720 8848742 1 chr10 8848742 8870679 0 chr10 8870679 8870690 1 chr10 8870690 130694993 0 megadepth-1.2.0/tests/test.cram.crai000066400000000000000000000001021420302544700173440ustar00rootroot00000000000000 ʱ0 \c08U8-8 M918megadepth-1.2.0/tests/test.cram.window.summed.tsv000066400000000000000000000000321420302544700220430ustar00rootroot00000000000000GL000219.1 152 chr10 3712 megadepth-1.2.0/tests/test.sam000066400000000000000000000410601420302544700162750ustar00rootroot00000000000000@HD VN:1.0 GO:none SO:coordinate @HD VN:1.4 SO:coordinate @SQ SN:GL000219.1 LN:179198 @SQ SN:chr10 LN:130694993 @RG ID:heart_50_fcb_2 PL: PU: ST: LB: DS: SM:heart CN: @RG ID:heart_75_fca PL: PU: ST: LB: DS: SM:heart CN: @PG ID:bwa PN:bwa VN:0.5.9-r16 @PG ID:STAR PN:STAR VN:STAR_2.6.1c CL:STAR --runMode alignReads --runThreadN 20 --genomeDir indexes/mouse38 --readFilesType Fastx --readFilesIn SRR579545_1.fastq.gz SRR579545_2.fastq.gz --readFilesCommand zcat --outTmpDir ./tmp --outReadsUnmapped Fastx --outMultimapperOrder Random --outSAMtype BAM Unsorted --outSAMmode NoQS --outSAMreadID Number --twopassMode None @CO user command line: STAR --runMode alignReads --runThreadN 20 --genomeDir indexes/mouse38 --readFilesIn SRR579545_1.fastq.gz SRR579545_2.fastq.gz --readFilesCommand zcat --twopassMode None --outReadsUnmapped Fastx --outMultimapperOrder Random --outSAMreadID Number --readFilesType Fastx --outTmpDir ./tmp --outSAMtype BAM Unsorted --outSAMmode NoQS HWI-BRUNOP16X_0001:7:43:5508:134615#0 99 GL000219.1 168545 0 50M = 168572 77 TTGGAATCCTATGTGAGGGACAAACATTCAGACCCTAGTTGCAGTGTTCT ggghgggggggggggggggggggggggggggggggggggggggggggggg RG:Z:heart_50_fcb_2 XT:A:R NM:i:0 SM:i:0 AM:i:0 X0:i:3 X1:i:1 XM:i:0 XO:i:0 XG:i:0 MD:Z:50 XA:Z:GL000199.1,+5644,50M,0;GL000219.1,+168545,50M,0;GL000199.1,+3348,50M,1; HWI-BRUNOP16X_0001:7:62:8533:144262#0 0 GL000219.1 168545 37 75M * 0 0 NNGGAATCCTATGTGAGGGACAAACATTCAGACCCTAGTTGCAGTGTTCTGGAATCTAATGTGAGGGACAAACAT BBJIJIIJJJeeeeeZ\^^[eeeeeeeeeeeeeeeeeeee^\^\\^\[[V^^\X\^V\\^R^^TVeeeeeee[ee RG:Z:heart_75_fca XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:0T0T73 HWI-BRUNOP16X_0001:7:43:5508:134615#0 147 GL000219.1 168572 37 50M = 168545 -77 TCAGACCCTAGTTGCAGTGTTCTGGAATCTAATGTGAGGGACCAACATTG gggggfggggggggggggggggggggggggggggggggggggaggggggg RG:Z:heart_50_fcb_2 XT:A:U NM:i:1 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:42A7 17304371 355 chr10 3225493 1 38M62440N34M = 3581107 355670 GACTCAATTCCCCAATAAAAAGACATAGACTAACAGACTGGCTACTCCAGCTTGTTTCTTCAGACCACTTGC * NH:i:3 HI:i:3 AS:i:113 nM:i:1 19540155 419 chr10 4195103 1 10M324070N62M = 4519214 324182 CTACTCCTGTCACCATGACCAAAAAGCAGGCTGGGGGAGGAAAGGGTTAATTCAGTTTACACTTCCAGATCA * NH:i:3 HI:i:2 AS:i:130 nM:i:0 19905142 419 chr10 4195103 1 10M324070N62M = 4519213 324182 CTACTCCTGTCACCATGACCAAAAAGCAGGCTGGGGGAGGAAAGGGTTAATTCAGTTTACACTTCCAGATCA * NH:i:3 HI:i:3 AS:i:131 nM:i:0 27213900 419 chr10 4195103 1 10M324070N62M = 4519213 324182 CTACTCCTGTCACCATGACCAAAAAGCAGGCTGGGGGAGGAAAGGGTTAATTCAGTTTACACTTCCAGATCA * NH:i:3 HI:i:3 AS:i:131 nM:i:0 42471489 419 chr10 4195103 1 10M324070N62M = 4519213 324182 CTACTCCTGTCACCATGACCAAAAAGCAGGCTGGGGGAGGAAAGGGTTAATTCAGTTTACACTTCCAGATCA * NH:i:3 HI:i:2 AS:i:131 nM:i:0 68651645 419 chr10 4195103 1 10M324070N62M = 4519213 324182 CTACTCCTGTCACCATGACCAAAAAGCAGGCTGGGGGAGGAAAGGGTTAATTCAGTTTACACTTCCAGATCA * NH:i:3 HI:i:2 AS:i:131 nM:i:0 73197705 419 chr10 4195103 1 10M324070N62M = 4519213 324182 CTACTCCTGTCACCATGACCAAAAAGCAGGCTGGGGGAGGAAAGGGTTAATTCAGTTTACACTTCCAGATCA * NH:i:3 HI:i:3 AS:i:131 nM:i:0 73334041 419 chr10 4195103 1 10M324070N62M = 4519213 324182 CTACTCCTGTCACCATGACCAAAAAGCAGGCTGGGGGAGGAAAGGGTTAATTCAGTTTACACTTCCAGATCA * NH:i:3 HI:i:3 AS:i:129 nM:i:1 19540155 419 chr10 4246663 1 10M272510N62M = 4519214 272622 CTACTCCTGTCACCATGACCAAAAAGCAGGCTGGGGGAGGAAAGGGTTAATTCAGTTTACACTTCCAGATCA * NH:i:3 HI:i:3 AS:i:130 nM:i:0 19905142 419 chr10 4246663 1 10M272510N62M = 4519213 272622 CTACTCCTGTCACCATGACCAAAAAGCAGGCTGGGGGAGGAAAGGGTTAATTCAGTTTACACTTCCAGATCA * NH:i:3 HI:i:2 AS:i:131 nM:i:0 27213900 419 chr10 4246663 1 10M272510N62M = 4519213 272622 CTACTCCTGTCACCATGACCAAAAAGCAGGCTGGGGGAGGAAAGGGTTAATTCAGTTTACACTTCCAGATCA * NH:i:3 HI:i:2 AS:i:131 nM:i:0 42471489 419 chr10 4246663 1 10M272510N62M = 4519213 272622 CTACTCCTGTCACCATGACCAAAAAGCAGGCTGGGGGAGGAAAGGGTTAATTCAGTTTACACTTCCAGATCA * NH:i:3 HI:i:3 AS:i:131 nM:i:0 68651645 419 chr10 4246663 1 10M272510N62M = 4519213 272622 CTACTCCTGTCACCATGACCAAAAAGCAGGCTGGGGGAGGAAAGGGTTAATTCAGTTTACACTTCCAGATCA * NH:i:3 HI:i:3 AS:i:131 nM:i:0 73197705 419 chr10 4246663 1 10M272510N62M = 4519213 272622 CTACTCCTGTCACCATGACCAAAAAGCAGGCTGGGGGAGGAAAGGGTTAATTCAGTTTACACTTCCAGATCA * NH:i:3 HI:i:2 AS:i:131 nM:i:0 73334041 419 chr10 4246663 1 10M272510N62M = 4519213 272622 CTACTCCTGTCACCATGACCAAAAAGCAGGCTGGGGGAGGAAAGGGTTAATTCAGTTTACACTTCCAGATCA * NH:i:3 HI:i:2 AS:i:129 nM:i:1 26573693 147 chr10 4358518 3 61M222441N11M = 4358476 -222555 ACAAGATGCTGCTGTTGGGACCTTGAGACCAAAATTTCAGAGCCCTTGAGGTGCAGAGAGCAACTCACGTCT * NH:i:2 HI:i:1 AS:i:116 nM:i:4 721428 163 chr10 4359040 255 72M = 4359085 117 TGGTTGTTTTCCTATGCACAGTGAGCTCAGAAATAAAAACTCCATTTTGAGACATTCAGAATGTCCCAACAT * NH:i:1 HI:i:1 AS:i:142 nM:i:0 28525348 163 chr10 4359040 3 72M = 4359087 117 TGGTTGTTTTCCTATGCACAGTGAGCTCAGAAATAAAAACTCCATTTTGAGACATTCAGAATGTCCCAACAT * NH:i:2 HI:i:1 AS:i:132 nM:i:1 28525348 419 chr10 4359040 3 72M = 4359087 117 TGGTTGTTTTCCTATGCACAGTGAGCTCAGAAATAAAAACTCCATTTTGAGACATTCAGAATGTCCCAACAT * NH:i:2 HI:i:2 AS:i:132 nM:i:1 32787505 163 chr10 4359040 255 72M = 4359085 117 TGGTTGTTTTCCTATGCACAGTGAGCTCAGAAATAAAAACTCCATTTTGAGACATTCAGAATGTCCCAACAT * NH:i:1 HI:i:1 AS:i:132 nM:i:5 37603910 163 chr10 4359040 255 72M = 4359085 117 TGGTTGTTTTCCTATGCACAGTGAGCTCAGAAATAAAAACTCCATTTTGAGACATTCAGAATGTCCCAACAT * NH:i:1 HI:i:1 AS:i:132 nM:i:5 46363291 163 chr10 4359040 255 72M = 4359091 117 TGGTTGTTTTCCTATGCACAGTGAGCTCAGAAATAAAAACTCCATTTTGAGACATTCAGAATGTCCCAACAT * NH:i:1 HI:i:1 AS:i:116 nM:i:10 48573388 163 chr10 4359040 255 72M = 4359085 117 TGGTTGTTTTCCTATGCACAGTGAGCTCAGAAATAAAAACTCCATTTTGAGACATTCAGAATGTCCCAACAT * NH:i:1 HI:i:1 AS:i:140 nM:i:1 51738339 163 chr10 4359040 255 72M = 4359086 117 TGGTTGTTTTCCTATGCACAGTGAGCTCAGAAATAAAAACTCCATTTTGAGACATTCAGAATGTCCCAACAT * NH:i:1 HI:i:1 AS:i:137 nM:i:0 76813656 163 chr10 4359040 255 72M = 4359085 117 TGGTTGTTTTCCTATGCACAGTGAGCTCAGAAATAAAAACTCCATTTTGAGACATTCAGAATGTCCCAACAT * NH:i:1 HI:i:1 AS:i:122 nM:i:10 77352856 163 chr10 4359040 255 72M = 4359088 117 TGGTTGTTTTCCTATGCACAGTGAGCTCAGAAATAAAAACTCCATTTTGAGACATTCAGAATGTCCCAACAT * NH:i:1 HI:i:1 AS:i:127 nM:i:2 17428017 147 chr10 4359046 255 72M = 4358909 -209 TTTTCCTATGCACAGTGAGCTCAGAAATAAAAACTCCATTTTGAGACATTCAGAATGTCCCAACATTACGAC * NH:i:1 HI:i:1 AS:i:142 nM:i:0 26223068 163 chr10 4359060 255 72M = 4359154 273 GTGAGCTCAGAAATAAAAACTCCATTTTGAGACATTAAGAATGTCCCAACATTACGACAACATTTTTTTTTT * NH:i:1 HI:i:1 AS:i:132 nM:i:1 64521410 163 chr10 4359060 255 72M = 4359154 273 GTGAGCTCAGAAATAAAAACTCCATTTTGAGACATTAAGAATGTCCCAACATTACGACAACATTTTTTTTTT * NH:i:1 HI:i:1 AS:i:132 nM:i:1 68186151 163 chr10 4359060 255 72M = 4359154 273 GTGAGCTCAGAAATAAAAACTCCATTTTGAGACATTCAGAATGTCCCAACATTACGACAACATTTTTTTTTT * NH:i:1 HI:i:1 AS:i:134 nM:i:0 1386481 163 chr10 4359068 255 70M2S = 4359308 312 AGATATAAAAACTCCATTTTGAGACATTCAGAATGTCCCAACATTACGACAACATTTTTTTTTTTTTTTTTT * NH:i:1 HI:i:1 AS:i:138 nM:i:1 721428 83 chr10 4359085 255 72M = 4359040 -117 TTTGAGACATTCAGAATGTCCCAACATTACGACAACATTTTTTTTTTTTTTTTCTAATCCAGTCCAGGTTGG * NH:i:1 HI:i:1 AS:i:142 nM:i:0 32787505 83 chr10 4359085 255 72M = 4359040 -117 TTTGGAACATTCAGATTTTCCCAACCTTACGACAACATTTTTTTTTTTTTTTTCTAATCCAGTCCAGGTTGG * NH:i:1 HI:i:1 AS:i:132 nM:i:5 37603910 83 chr10 4359085 255 72M = 4359040 -117 TTTTAGAAATTCAAAATTTCCCAAAATTACGACAACATTTTTTTTTTTTTTTTCTAATCCAGTCCAGGTTGG * NH:i:1 HI:i:1 AS:i:132 nM:i:5 48573388 83 chr10 4359085 255 72M = 4359040 -117 TTTGAGACATTCAGAATTTCCCAACATTACGACAACATTTTTTTTTTTTTTTTCTAATCCAGTCCAGGTTGG * NH:i:1 HI:i:1 AS:i:140 nM:i:1 76813656 83 chr10 4359085 255 72M = 4359040 -117 TTTGGAAAATTGCAAATGTCCCAACATTTCCGAAACATTTTTTTTTTTTTTTTCTAATCCAGTCCAGGTTGG * NH:i:1 HI:i:1 AS:i:122 nM:i:10 51738339 83 chr10 4359086 255 52M1I19M = 4359040 -117 TTGAGACATTCAGAATGTCCCAACATTACGACAACATTTTTTTTTTTTTTTTTCTAATCCAGTCCAGGTTGG * NH:i:1 HI:i:1 AS:i:137 nM:i:0 9978511 163 chr10 4359087 255 72M = 4359114 99 TGGGACATTCTGAATGTCTCAACATTACGACAACATTTTTTTTTTTTTTTTTTAATCCAGTCCAGGTTGGAA * NH:i:1 HI:i:1 AS:i:134 nM:i:4 52814524 163 chr10 4359087 255 35M1D37M = 4359123 99 TGGGACATTCTGAATGTCTCAACATTACGACAACATTTTTTTTTTTTTTTCTAATCCAGTCCAGGTTGGAAA * NH:i:1 HI:i:1 AS:i:123 nM:i:3 28525348 83 chr10 4359087 3 35M2I35M = 4359040 -117 TGAGACATTCAGAATGTCCCACCATTACGACAACATTTTTTTTTTTTTTTTTTCTAATCCAGTCCAGGTTGG * NH:i:2 HI:i:1 AS:i:132 nM:i:1 28525348 339 chr10 4359087 3 37M2I33M = 4359040 -117 TGAGACATTCAGAATGTCCCACCATTACGACAACATTTTTTTTTTTTTTTTTTCTAATCCAGTCCAGGTTGG * NH:i:2 HI:i:2 AS:i:132 nM:i:1 77352856 83 chr10 4359088 255 50M3I19M = 4359040 -117 GAGACATTCAGAAATTCCCAACATTACGACAACATTTTTTTTTTTTTTTTTTTCTAATCCAGTCCAGGTTGG * NH:i:1 HI:i:1 AS:i:127 nM:i:2 46363291 83 chr10 4359091 255 6S66M = 4359040 -117 TGTGATACATTAAGAACCACCCATCAGTACGACATTTTTTTTTTTTTTTCTTTCTAATCCAGTCCAGGTTGG * NH:i:1 HI:i:1 AS:i:116 nM:i:10 9978511 83 chr10 4359114 255 72M = 4359087 -99 CGACAACATTTTTTTTTTTTTTTTCTAATCCAGTCCAGGTTGGAAAGAAGTCTCCTTAGTGTCAGATTAAGC * NH:i:1 HI:i:1 AS:i:134 nM:i:4 56876898 83 chr10 4359118 255 71M1S = 4359007 -182 AAAAATTGTTTGTTTTTTTTCTAATCCAGTCCAGGTTGGAAAGAAGTCTCCTTAGTGTCAGATTAAGCCCCT * NH:i:1 HI:i:1 AS:i:129 nM:i:4 41734196 83 chr10 4359122 255 4S67M1S = 4359007 -182 ACATTTTTTTTTTTTTTTTTCTAATCCAGTCCAGGTTGGAAAGAAGTCTCCTTAGTGTCAGATTAAGCCCCT * NH:i:1 HI:i:1 AS:i:133 nM:i:0 59413733 163 chr10 8458610 1 13M319936N59M = 8778565 320027 CACACACAACCTTGGGGTTGGGGATTTAGCTCAGTGGTAGAGCGCTTGCCTAGCAAGCGCAAGGCCCTGGGT * NH:i:3 HI:i:1 AS:i:131 nM:i:0 63502504 83 chr10 8722265 255 50M126406N22M = 8722217 -126526 TAATTATAGACAAGTTTTGATACACAGGAAAACCCTTCTGTCTACCTTCCATTTAAAAAAAAAAAAAAAAAG * NH:i:1 HI:i:1 AS:i:109 nM:i:4 15130473 147 chr10 8722447 255 61M148172N11M = 8722427 -148264 GTTTAAGAATAGCAATGGAGAAAAATAAGTTATTTAAATATTGATTTCATATACAGAAAGTAGCTGTAATAT * NH:i:1 HI:i:1 AS:i:132 nM:i:0 3043745 339 chr10 8729842 3 63M98397N9M = 8729780 -98531 GGCTGCTGGGCCTCGAAATCTCTGGGTATACACTGAGGAGGAGCTATCCCAAGGTTGGAGGCCCAGCCGCCT * NH:i:2 HI:i:2 AS:i:130 nM:i:1 9926081 339 chr10 8729842 3 63M98397N9M = 8729780 -98531 GGCGGCTGGGCCTCGAAATCTCTGGGTATACACTGAGGAGGAGCTATCCCAAGGTTGGAGGCCCAGCCGCCT * NH:i:2 HI:i:2 AS:i:132 nM:i:0 17688638 339 chr10 8729842 3 63M98397N9M = 8729780 -98531 GGCGGCTGGGCCTCGAAATCTCTGGGTATACACTGAGGAGGAGCTATCCCAAGGTTGGAGGCCCAGCCGCCT * NH:i:2 HI:i:2 AS:i:130 nM:i:1 20097762 339 chr10 8729842 3 63M98397N9M = 8729780 -98531 GGCGGCTGGGCCTCGAAATCTCTGGGTATACACTGAGGAGGAGCTATCCCAAGGTTGGAGGCCCAGCCGCCT * NH:i:2 HI:i:2 AS:i:132 nM:i:0 26342809 339 chr10 8729842 3 63M98397N9M = 8729780 -98531 GGCGGCTGGGCCTCGAAATCTCTGGGTATACACTGAGGAGGAGCTATCCCAAGGTTGGAGGCCCAGCCGCCT * NH:i:2 HI:i:2 AS:i:130 nM:i:1 29317198 339 chr10 8729842 3 63M98397N9M = 8729780 -98531 GGCGGCTGGGCCCCGAAATCTCTGGGTATACACTGAGGAGGAGCTATCCCAAGGTTGGAGGCCCAGCCGCCT * NH:i:2 HI:i:2 AS:i:128 nM:i:2 29889276 339 chr10 8729842 3 63M98397N9M = 8729780 -98531 GGCGGCTGGGCCTCGAAATCTCTGGGTATACACTGAGGAGGAGCTATCCCAAGGTTGGAGGCCCAGCCGCCT * NH:i:2 HI:i:2 AS:i:132 nM:i:0 35949106 339 chr10 8729842 3 63M98397N9M = 8729780 -98531 GGCGGCTGGGCCTCGAAATCTCTGGGTATACACTGAGGAGGAGCTATCCCAAGGTTGGAGGCCCAGCCGCCT * NH:i:2 HI:i:2 AS:i:132 nM:i:0 44003641 339 chr10 8729842 3 63M98397N9M = 8729780 -98531 GGCGGCTGGGCCTCGAAATCTCTGGGTATACACTGAGGAGGAGCTATCCCAAGGTTGGAGGCCCAGCCGCCT * NH:i:2 HI:i:2 AS:i:132 nM:i:0 50576661 339 chr10 8729844 3 2S61M98397N9M = 8729780 -98531 GCCGGCGGGGCCTCGAAATCTCTGGGTATACACTGAGGAGGAGCTATCCCAAGGTTGGAGGCCCAGCCGCCT * NH:i:2 HI:i:2 AS:i:126 nM:i:2 22331161 83 chr10 8756700 255 62M23757N10M = 8756679 -23850 AGCTTCTGCACCCTCTTAAACTTCACCGATTCCTCTGTCTCATCATCCGACTGCTCTCTTGAGTTGCAGGTG * NH:i:1 HI:i:1 AS:i:124 nM:i:4 37510913 83 chr10 8756700 255 62M23757N10M = 8756679 -23850 AGCTGCTGCACCCTCTTAAACTTCACCGAGTCCTCTGTCTCATCATCCGACTGCTCTCTTGAGTTGCAGGTG * NH:i:1 HI:i:1 AS:i:124 nM:i:4 38798461 83 chr10 8756700 255 62M23757N10M = 8756679 -23850 AGCTTGTGCAACCTCTTAAACTTCACCGAGTCCTCTGTCTCATCATCCGACTGCTCTCTTGAGTTGCAGGTG * NH:i:1 HI:i:1 AS:i:130 nM:i:1 62329988 83 chr10 8756700 255 62M23757N10M = 8756679 -23850 AGCTTGTGCAACCTCTGAAACTTCACAGAGTCCTCTGTCTCATCATCCGACTGCTCTCTTGAGTTGCAGGTG * NH:i:1 HI:i:1 AS:i:126 nM:i:3 66789502 83 chr10 8756700 255 62M23757N10M = 8756679 -23850 AGCTTGGCCAACCTCTTAAGCTTCTGCGCGTCCTCTGTCTCATCATCCGACTGCTCTCTTGAGTTGCAGGTG * NH:i:1 HI:i:1 AS:i:118 nM:i:7 78396176 83 chr10 8756700 255 62M23757N10M = 8756679 -23850 AGCTTGTGCAACCTCTTAAACTTCACCGAGTCCTCTGTCTCATCATCCGACTGCTCTCTTGAGTTGCAGGTG * NH:i:1 HI:i:1 AS:i:130 nM:i:1 5014742 163 chr10 8756715 255 47M23757N25M = 8756739 23853 TTAAACTTCACCGAGTCCTCTGTCTCATCATCCGACTGCTCTCTTGAGTTGCAGGTGGAGGAGCCATCCCAT * NH:i:1 HI:i:1 AS:i:138 nM:i:1 4530127 163 chr10 8756715 255 47M23757N25M = 8756739 23853 TTAAACTTCACCGAGTCCTCTGTCTCATCATCCGACTGCTCTCTTGAGTTGCAGGTGGAGGAGCCATCCAAT * NH:i:1 HI:i:1 AS:i:138 nM:i:1 18685945 163 chr10 8756715 255 47M23757N25M = 8756739 23853 TTAAACTTCACCGAGTCCTCTGTCTCATCATCCGACTGCTCTCTTGAGTTGCAGGTGGAGGAGCCATCCAAT * NH:i:1 HI:i:1 AS:i:140 nM:i:0 28275148 163 chr10 8756715 255 47M23757N25M = 8780519 23853 TTAAACTTCACCGAGTCCTCTGTCTCATCATCCGACTGCTCTCTTGAGTTGCAGGTGGAGGAGCCATCCAAT * NH:i:1 HI:i:1 AS:i:111 nM:i:3 35248359 163 chr10 8756715 255 47M23757N25M = 8756742 23853 TTAAACTTCACCGAGTCCTCTGTCTCATCATCCGACTGCTCTCTTGAGTTGCAGGGGGAGGAGCCATCCAAT * NH:i:1 HI:i:1 AS:i:133 nM:i:2 36050844 163 chr10 8756715 255 47M23757N25M = 8756739 23853 TTAAACTTCACCGAGTCCTCTGTCTCATCATCCGACTGCTCTCTTGAGTTGCAGGTGGAGGAGCCATCCAAT * NH:i:1 HI:i:1 AS:i:140 nM:i:0 43161047 163 chr10 8756715 255 47M23757N25M = 8756739 23853 TTAAACTTCACCGAGTCCTCTGTCTCATCATCCGACTGCTCTCTTGAGTTGCAGGTGGAGGAGCCATCCAAT * NH:i:1 HI:i:1 AS:i:140 nM:i:0 52182721 163 chr10 8756715 255 47M23757N25M = 8780519 23853 TTAAACTTCACCGAGTCCTCTGTCTCATCATCCGACTGCTCTCTTGAGTTGCAGGTGGAGGAGCCATCCAAT * NH:i:1 HI:i:1 AS:i:117 nM:i:0 58902510 163 chr10 8756715 255 47M23757N25M = 8756739 23853 TTAAACTTCACCGAGTCCTCTGTCTCATCATCCGACTGCTCTCTTGAGTTGCAGGTGGAGGAGCCATCCAAT * NH:i:1 HI:i:1 AS:i:138 nM:i:1 61632745 163 chr10 8756715 255 47M23757N25M = 8756739 23853 TTAAACTTCACCGAGTCCTCTGTCTCATCATCCGACTGCTCTCTTGAGTTGCAGGTGGAGGAGCCATCCAAT * NH:i:1 HI:i:1 AS:i:140 nM:i:0 63835388 163 chr10 8756715 255 47M23757N25M = 8756743 23853 TTAAACTTCACCGAGTCCTCTGTCTCATCATCCGACTGCTCTCTTGAGTTGCAGGTGGAGGAGCCATCCCAT * NH:i:1 HI:i:1 AS:i:134 nM:i:1 71287855 163 chr10 8756715 255 47M23757N25M = 8756739 23853 TTAAACTTCACCGAGTCCTCTGTCTCATCATCCGACTGCTCTCTTGAGTTGCAGGTGGAGGAGCCATCCAAT * NH:i:1 HI:i:1 AS:i:140 nM:i:0 78165236 163 chr10 8756715 255 47M23757N25M = 8756739 23853 TTAAACTTCACCGAGTCCTCTGTCTCATCATCCGACTGCTCTCTTGAGTTGCAGGTGGAGGAGCCATCCAAT * NH:i:1 HI:i:1 AS:i:140 nM:i:0 76181825 163 chr10 8756715 255 47M23757N25M = 8780519 23853 TTAAACTTCACCGAGTCCTCTGTCTCATCATCCGACTGCTCTCTTGAGTTGCAGGTGGAGGAGCCATCCAAT * NH:i:1 HI:i:1 AS:i:117 nM:i:0 5014742 83 chr10 8756739 255 23M23757N49M = 8756715 -23853 TCATCATCCGACTGCTCTCTTGAGTTGCAGGTGGAGGAGCCATCCAATGTCGGGTAGGAGCCATCGGGCCAG * NH:i:1 HI:i:1 AS:i:138 nM:i:1 4530127 83 chr10 8756739 255 23M23757N49M = 8756715 -23853 TCATCACCCGACTGCTCTCTTGAGTTGCAGGTGGAGGAGCCATCCAATGTCGGGTAGGAGCCATCGGGCCAG * NH:i:1 HI:i:1 AS:i:138 nM:i:1 18685945 83 chr10 8756739 255 23M23757N49M = 8756715 -23853 TCATCATCCGACTGCTCTCTTGAGTTGCAGGTGGAGGAGCCATCCAATGTCGGGTAGGAGCCATCGGGCCAG * NH:i:1 HI:i:1 AS:i:140 nM:i:0 36050844 83 chr10 8756739 255 23M23757N49M = 8756715 -23853 TCATCATCCGACTGCTCTCTTGAGTTGCAGGTGGAGGAGCCATCCAATGTCGGGTAGGAGCCATCGGGCCAG * NH:i:1 HI:i:1 AS:i:140 nM:i:0 43161047 83 chr10 8756739 255 23M23757N49M = 8756715 -23853 TCATCATCCGACTGCTCTCTTGAGTTGCAGGTGGAGGAGCCATCCAATGTCGGGTAGGAGCCATCGGGCCAG * NH:i:1 HI:i:1 AS:i:140 nM:i:0 58902510 83 chr10 8756739 255 23M23757N49M = 8756715 -23853 TCACCATCCGACTGCTCTCTTGAGTTGCAGGTGGAGGAGCCATCCAATGTCGGGTAGGAGCCATCGGGCCAG * NH:i:1 HI:i:1 AS:i:138 nM:i:1 61632745 83 chr10 8756739 255 23M23757N49M = 8756715 -23853 TCATCATCCGACTGCTCTCTTGAGTTGCAGGTGGAGGAGCCATCCAATGTCGGGTAGGAGCCATCGGGCCAG * NH:i:1 HI:i:1 AS:i:140 nM:i:0 71287855 83 chr10 8756739 255 23M23757N49M = 8756715 -23853 TCATCATCCGACTGCTCTCTTGAGTTGCAGGTGGAGGAGCCATCCAATGTCGGGTAGGAGCCATCGGGCCAG * NH:i:1 HI:i:1 AS:i:140 nM:i:0 78165236 83 chr10 8756739 255 23M23757N49M = 8756715 -23853 TCATCATCCGACTGCTCTCTTGAGTTGCAGGTGGAGGAGCCATCCAATGTCGGGTAGGAGCCATCGGGCCAG * NH:i:1 HI:i:1 AS:i:140 nM:i:0 35248359 83 chr10 8756742 255 3S20M23757N49M = 8756715 -23853 TACTCATCCGACTGCTCTCTTGATTTGCAGGTGGAGGAGCCATCCAATGTCGGGTAGGAGCCATCGGGCCAG * NH:i:1 HI:i:1 AS:i:133 nM:i:2 63835388 83 chr10 8756743 255 4S19M23757N49M = 8756715 -23853 TCTCCATCCGACTGCTCTCTTGAGTTGCAGGTGGAGGAGCCATCCAATGTCGGGTAGGAGCCATCGGGCCAG * NH:i:1 HI:i:1 AS:i:134 nM:i:1 29133969 355 chr10 8763812 1 53M274N19M = 8764156 416 CCTGTGGCTCTGTCCTGCGGTTCTGTCCTGCGGCTCTGTCCTGCGGTTCTGGCCCTGTGGCTCTGTCCTGCG * NH:i:4 HI:i:2 AS:i:138 nM:i:2 29133969 355 chr10 8763812 1 53M130N19M = 8764012 272 CCTGTGGCTCTGTCCTGCGGTTCTGTCCTGCGGCTCTGTCCTGCGGTTCTGGCCCTGTGGCTCTGTCCTGCG * NH:i:4 HI:i:3 AS:i:138 nM:i:2 29133969 355 chr10 8763812 1 53M130N19M = 8764156 416 CCTGTGGCTCTGTCCTGCGGTTCTGTCCTGCGGCTCTGTCCTGCGGTTCTGGCCCTGTGGCTCTGTCCTGCG * NH:i:4 HI:i:4 AS:i:138 nM:i:2 megadepth-1.2.0/tests/test.sh000077500000000000000000000157111420302544700161360ustar00rootroot00000000000000#!/usr/bin/env bash set -xe static=$1 if [[ -z $static ]]; then time ./megadepth http://stingray.cs.jhu.edu/data/temp/test.bam --prefix test.bam --threads 4 --bigwig --auc --min-unique-qual 10 --annotation tests/test_exons.bed --frag-dist --alts --include-softclip --only-polya --read-ends --test-polya --no-annotation-stdout --no-auc-stdout --filter-out 260 --add-chr-prefix human > test_run_out 2>&1 time ./megadepth http://stingray.cs.jhu.edu/data/temp/test.cram --prefix test.cram --threads 4 --coverage --no-coverage-stdout --auc --min-unique-qual 10 --annotation 400 --frag-dist --alts --include-softclip --only-polya --read-ends --test-polya --no-annotation-stdout --no-auc-stdout --filter-out 260 > test_cram_run_out 2>&1 else time ./megadepth tests/test_noprefix.bam --prefix test.bam --threads 4 --bigwig --auc --min-unique-qual 10 --annotation tests/test_exons.bed --frag-dist --alts --include-softclip --only-polya --read-ends --test-polya --no-annotation-stdout --no-auc-stdout --filter-out 260 --add-chr-prefix human > test_run_out 2>&1 time ./megadepth tests/test.cram --prefix test.cram --threads 4 --coverage --no-coverage-stdout --auc --min-unique-qual 10 --annotation 400 --frag-dist --alts --include-softclip --only-polya --read-ends --test-polya --no-annotation-stdout --no-auc-stdout --filter-out 260 > test_cram_run_out 2>&1 fi diff <(sort tests/test.bam.orig.frags.tsv) <(sort test.bam.frags.tsv) diff tests/test.bam.orig.alts.tsv test.bam.alts.tsv diff tests/test.bam.orig.softclip.tsv test.bam.softclip.tsv for f in annotation unique; do diff tests/test.bam.mosdepth.${f}.per-base.exon_sums.tsv test.bam.${f}.tsv done diff tests/test.bam.mosdepth.bwtool.all_aucs test.bam.auc.tsv #test base coverage other than BigWigs diff tests/test.cram.coverage.tsv test.cram.coverage.tsv #test --annotation cut -f 1,4 test.cram.window.tsv | perl -ne 'chomp; ($c,$v)=split(/\t/,$_); $h{$c}+=$v; END { for $c (sort keys %h) { print "$c\t".$h{$c}."\n"; }}' > test.cram.window.summed.tsv diff tests/test.cram.window.summed.tsv test.cram.window.summed.tsv #check --op mean with BAMs ./megadepth tests/test.bam --annotation tests/test_exons.bed --op mean --add-chr-prefix human > test.bam.mean paste <(cut -f 4 test.bam.annotation.tsv) <(cut -f 2- test.bam.mean) | perl -ne 'chomp; $f=$_; ($sum,$s,$e,$m)=split(/\t/,$_); $d=($e-$s); $m2=$sum/$d; $m2=sprintf("%.2f",$m2); if($m != $m2) { print "$f\n"; $ret=1;} END { exit($ret); }' ./megadepth tests/test.bam | fgrep "ALL_READS_ALL_BASES" > auc.single diff auc.single <(fgrep "ALL_READS_ALL_BASES" tests/test.bam.mosdepth.bwtool.all_aucs) cat test.bam.starts.tsv test.bam.ends.tsv | sort -k1,1 -k2,2n -k3,3n > test_starts_ends.tsv diff test_starts_ends.tsv <(sort -k1,1 -k2,2n -k3,3n tests/test.bam.read_ends.both.unique.tsv) time ./megadepth tests/test2.bam --threads 4 --junctions --all-junctions --prefix test2.bam > test2_run_out 2>&1 diff tests/test2.bam.jxs.tsv test2.bam.jxs.tsv diff tests/test2.bam.all_jxs.tsv test2.bam.all_jxs.tsv #test just total auc time ./megadepth test.bam.all.bw | grep "AUC" > test.bw1.total_auc diff test.bw1.total_auc tests/testbw1.total_auc #test bigwig2sums/auc time ./megadepth test.bam.all.bw --annotation tests/testbw1.bed --auc --prefix test.bam.bw1 --no-annotation-stdout --no-auc-stdout diff test.bam.bw1.annotation.tsv tests/testbw1.bed.out.tsv diff test.bam.bw1.auc.tsv tests/testbw1.annot_auc ##use different order in BED file from what's in BW to test keep_order == true time ./megadepth test.bam.all.bw --annotation tests/testbw2.bed --auc --prefix test.bam.bw2 --no-annotation-stdout --no-auc-stdout diff test.bam.bw2.annotation.tsv tests/testbw2.bed.out.tsv diff test.bam.bw2.auc.tsv tests/testbw2.annot_auc #test bigwig2mean time ./megadepth test.bam.all.bw --op mean --annotation tests/testbw2.bed --prefix bw2.mean --no-annotation-stdout >> test_run_out 2>&1 diff bw2.mean.annotation.tsv tests/testbw2.bed.mean #test bigwig2min time ./megadepth test.bam.all.bw --op min --annotation tests/testbw2.bed --prefix bw2.min --no-annotation-stdout >> test_run_out 2>&1 diff bw2.min.annotation.tsv tests/testbw2.bed.min #test bigwig2max time ./megadepth test.bam.all.bw --op max --annotation tests/testbw2.bed --prefix bw2.max --no-annotation-stdout >> test_run_out 2>&1 diff bw2.max.annotation.tsv tests/testbw2.bed.max #now test same-start alignments for overlapping pairs ./megadepth tests/test3.bam --auc --coverage --prefix t3 --no-auc-stdout > t3.tsv diff <(head -197 tests/test3.out.tsv) t3.tsv diff <(tail -n1 tests/test3.out.tsv) t3.auc.tsv #with uniques ./megadepth tests/test3.bam --coverage --min-unique-qual 10 --bigwig --auc --prefix test3 --no-auc-stdout diff tests/test3.auc.out.tsv test3.auc.tsv #long reads support for junctions ./megadepth tests/long_reads.bam --junctions --prefix long_reads.bam --long-reads diff tests/long_reads.bam.jxs.tsv long_reads.bam.jxs.tsv #test bigwig2sum on remote BW if [[ -z $static ]]; then time ./megadepth http://stingray.cs.jhu.edu/data/temp/megadepth.test.bam.all.bw --op mean --annotation tests/testbw2.bed --prefix bw2.remote.mean --no-annotation-stdout >> test_run_out 2>&1 diff bw2.remote.mean.annotation.tsv tests/testbw2.bed.mean fi ##only print sums use different order in BED file from what's in BW to test keep_order == true time ./megadepth test.bam.all.bw --sums-only --annotation tests/testbw2.bed --prefix test.bam.bw2 > test.bam.bw2.annotation.tsv diff test.bam.bw2.annotation.tsv <(cut -f 4 tests/testbw2.bed.out.tsv) ./megadepth http://stingray.cs.jhu.edu/data/temp/test.bam --prefix test.bam.names --threads 4 --alts --write-names --include-softclip --only-polya --test-polya --no-annotation-stdout --no-auc-stdout --filter-out 260 --add-chr-prefix human > test_run_out2 2>&1 diff test.bam.names.alts.tsv tests/test.bam.names.alts.tsv #test multiple overlap types for BigWig annotation processing ./megadepth tests/bw.all_overlap_types.test_input.bw --annotation tests/gh_bug_9.bed --auc > test.bw.all_overlap_types.test_output.bed diff test.bw.all_overlap_types.test_output.bed tests/bw.all_overlap_types.test_output.bed #test faster mode with collapsed intervals in BigWig annotation processing ./megadepth tests/TCGA_BLCA_A13J.vcf.gz_cg_cov5.bw.bg.gz.chr1.60379.62229.bw --annotation tests/chr1.61863.62160.bed --no-annotation-stdout --prefix TCGA_BLCA_A13J_vs_chr1.61863.62160 diff TCGA_BLCA_A13J_vs_chr1.61863.62160.annotation.tsv tests/TCGA_BLCA_A13J_vs_chr1.61863.62160.annotation.tsv #test that we're catching out of order chromosomes output=$(./megadepth tests/TCGA_BLCA_A13J.vcf.gz_cg_cov5.bw.bg.gz.chr1.60379.62229.bw --annotation tests/chr1.61863.62160.bad_chrm_order.bed 2>&1) fgrep "falling back" <(echo "$output") #clean up any previous test files rm -f test*tsv test*auc bw2* test3* test2* t3.* long_reads.bam.jxs.tsv test_run_out *null*.unique.tsv test.*.bw auc.single test.bam.mean test.cram.coverage.tsv test_cram_run_out test.cram.coverage.tsv.summed test.bw.all_overlap_types.test_output.bed TCGA_BLCA_A13J_vs_chr1.61863.62160.annotation.tsv megadepth-1.2.0/tests/test1.bam000066400000000000000000000060741420302544700163430ustar00rootroot00000000000000BCTݎ@ؗ]eaVhaCuߪoЇj:^ s|ߙ0~W1w`2GQ9 aL sdz˰T̤gm7 n]cYFߝEs6CYbb#0"0䳽WA0r˺)[Uf'NVtT&75~[jW50!-b)iZoy5*TZp,H+ff!8%8k؏|u`K-rsx[0bGk)+rҾJǻ8!##V 5s]gsmR]>In/G,M1 SEMBiV]DX8HxMEE"s]Қ(;l hθIAZaK+w9CH7ҳŎe8#:90Ft}见^u!7u1k1z@/cq!سL7|Wu|PQ BC QFݻ%,Q"G\;}&qi/I"AR*ၢ&v|$OZ& $$""!$x@ T6i쵲ڽgvJ3ib #7U>GlxZ4"0_G:%ir"(⪚xRh:|讎O}T?Ϟ6Itn.^iGvxSN(^e@})6p:MMZ>>!%&*fvÌEM` ԃn_r.0g_3D^ZDΧ=Ār/,?E UTe%0ɂ˥0Wy)r.A Z$vHMdռ(J Vx|"F&źI+m˱mcNw]̀%!h]C?[f@SݏHvw 5@; ڲàsjLz?8\lFشL=|QEuV,D]14-_{xS"uKMJtlXwiV&fY2`6a.M@?Wvmin+1=EPôoOQća:mlBuHF\#45ͿQ#MmG7cjz0#5%%Mv40!^56l˦mcF ¦WaG#"}kʣ:טrӼ{3ස3/rW'=]ϬeY2/rs5 Е.:iB4IGA4E χMA绦zfP[& ,C˻O\9!K?/G<%^:-?@/8A{ëqAy~j-~U-tllRyy;%@ن =8grk0CBCmegadepth-1.2.0/tests/test1.bam.bai000066400000000000000000000110601420302544700170640ustar00rootroot00000000000000BAIJS Q" S<"J ]LM<                                                                                                                                                                                                                                                """"""""""""""""""""l#l#l#l#l#!$megadepth-1.2.0/tests/test2.bam000066400000000000000000000016561420302544700163450ustar00rootroot00000000000000BC0T[0M_SJ]7@^ )/ E"_dIKqtҕ]@S *>&Rso|rcVڟα4YE([>,⸲ԟx5k5Ml-huP }MmuӬU V C]˅&# Lfu-ķUAsqMCġk&Er#NUِ*%:nv]rC|YJHc\0q)C%CUu".FAZ³u@V9ehw?TiZkVd8}O0E)sD \hFXh}WBtcix1"|/IYg'%V;'1JOSPvQA1 0I:6+vlIJ)8oCr؇N/E4b"f7a Bгv>7­^] =|~Nڽ'VHA;FN@ "fQ?$BC`1K@ .92d*$*$1!d%Hu8:&IpsU7D.PnngohqaO82&A&p|=,}Hj}+IKujfY]&`663ӿsqF0${ZHdkH/jv5i*?46.,r[}y]@%q& usg VQ.m iKIq=5b&a hhKO(\˂ߡGT<ӻ<' '1G9'*-BCmegadepth-1.2.0/tests/test2.bam.all_jxs.tsv000066400000000000000000000005341420302544700206050ustar00rootroot000000000000007504219 chr1 18310 188572 0 36M170263N12M206N27M 1 7504219 chr1 188585 188790 0 36M170263N12M206N27M 1 19540155 chr10 4195113 4519182 0 10M324070N62M 0 19905142 chr10 4195113 4519182 0 10M324070N62M 0 27213900 chr10 4195113 4519182 0 10M324070N62M 0 19540155 chr10 4246673 4519182 1 10M272510N62M 0 19905142 chr10 4246673 4519182 1 10M272510N62M 0 megadepth-1.2.0/tests/test2.bam.jxs.tsv000066400000000000000000000004441420302544700177550ustar00rootroot00000000000000chr1 18274 0 170610 36M170263N12M206N27M 18310-188572,188585-188790 1 chr10 4195103 0 324182 10M324070N62M 4195113-4519182 0 chr10 4246663 1 -272622 10M272510N62M 4246673-4519182 0 chr10 4195103 0 324182 10M324070N62M 4195113-4519182 0 chr10 4246663 1 -272622 10M272510N62M 4246673-4519182 0 megadepth-1.2.0/tests/test3.auc.out.tsv000066400000000000000000000000631420302544700177670ustar00rootroot00000000000000ALL_READS_ALL_BASES 675 UNIQUE_READS_ALL_BASES 600 megadepth-1.2.0/tests/test3.bam000066400000000000000000000070771420302544700163510ustar00rootroot00000000000000BC Y}]E?vK@|aޙwfjB[ZJqw[vYݕ}-o߶[ `DTj#!% M(ABEc0jb0}3ݏ_93wΜs[{yK eԮљf5;ߨkC.kQ8!wb8 X8,v  J"yNC& t* b@daĄBf8q:tbZ<WcY(d("B?`q̘YKafc ZH! :ʬHdJ,1<_?&P ucxoXa:YGv"00L*dɜɕI`ӭjcYA&S99ΛJj(2b,ȉ Z_ ɉª*D0/)ϽX_U2s6\!9+?c,[A.qvݰ#Pb;DWYet~{@J 9[H,.B8r<"\d},ȝ?XayV qXAt!ⰰGgAZYBp4(¡A<͐w>OwkO0C{[^%LC ~ !]C[p ')=C/)>LC4G~qĠP*$aGi|EF!+(H'QH@}e45$!0D7|_x@莱>޿ !־ :av+ip }wh^.}, Uة|N[m ` %tXgP!c]2 Psq4,BNa"=ĕ^Ս\'0U$NVPc>xZĞ> 0>|Cuo^i aiR)о?듹@.cY770ʳ$W5Ow89 ;ɥpTP|0W-p_e;}ӧy\_6}EP1+wӐHS8SQs #TWE 8Uڭu27i. nRN^|C R! }LLQZqcG`4UÏ@s 2BK#(Tqc7rx\ZW)ϰ8Ά<йWR$|g{:wܹmt-':=͇0NzW%?V*MZˍlR]ߘUgϗϵDE'?5ڶVedhW֦zs^if?F =ԮgW2cl1;Y7:%._-lb]/l4Z퓻C>mWfKk7kM5ۗW516m9.hԫƗ ՚*ɝ-*[h,5[mc9#P1h^6n^nז*Ƿu,;U&W53szoTOt1Ӟo6v檍xanы-2-;Օ*zz]a^}X]Zx0v ./4uPeb.LBھ٤f[21'MUgX)T.g1a'˷ɓpa'Do7㧺N7'^B=!_! KA#s1} ׭u4|󶞒b)&oz ƏTӧμIܫk;>tZ>3?>^h}Ϲc;w%}U{dLNgu̐~yo ^S庲v ~Ϟܽy~{xftJ'ȃniI)]pgY;ߠWIa0 k?ݛ`J) ƟIyL=iSt2 /-™&5>=Iq0äWijK~&JzeLtxFxfCGLNWkSL-85ҿ~ӛ~&|;הd{8(҉2=T3~G@?7<3${W~Gnxg iKD:pngtbߴC:=_`6G7[G50lQ-o"خ-RLP3R)ݶԞ!X#S&T-h0e ]4?^֏o}^LY?.w*]z 74܎ok`2`kzʀ%e虲d8F2TN@ 94gru^ſxNo H" ճv-9hX{o+I̯V@/ /z&#*z>l.,O2@ !S)ڞC@>PE=4wo Bko࠽FIG.{̍4Pm{ ۾J_čȔ00e{lo^dʆ ;yc.J3`%g ɒӁIL  `#` ;$}@cs) -n)Gr# 4@6?G߄iXSO.]} $P lT~Х vBiP' ]Y;0cE ?([ /ԊahX▒?LY+v_#G ſSٵaа\P^܎-cYR](o'W`=/ K>Y=x5^=k,H3b%˽4@FaO01BC[qbdHfea M M6008 :z8:J8::)z8 (8 uH *I(:65I8 :5 pfu27eXp6Hjl4@:220XZX x@r*'Zu5[  M- MAW::sMss%\?`=r@'\|nu حffϭQVjn%T u(0$gUBCmegadepth-1.2.0/tests/test3.out.tsv000066400000000000000000000123561420302544700172300ustar00rootroot00000000000000chr1 0 100990161 0 chr1 100990161 100990236 9 chr1 100990236 248956422 0 chr2 0 242193529 0 chr3 0 198295559 0 chr4 0 190214555 0 chr5 0 181538259 0 chr6 0 170805979 0 chr7 0 159345973 0 chr8 0 145138636 0 chr9 0 138394717 0 chr10 0 133797422 0 chr11 0 135086622 0 chr12 0 133275309 0 chr13 0 114364328 0 chr14 0 107043718 0 chr15 0 101991189 0 chr16 0 90338345 0 chr17 0 83257441 0 chr18 0 80373285 0 chr19 0 58617616 0 chr20 0 64444167 0 chr21 0 46709983 0 chr22 0 50818468 0 chrX 0 156040895 0 chrY 0 57227415 0 chrM 0 16569 0 chr1_KI270706v1_random 0 175055 0 chr1_KI270707v1_random 0 32032 0 chr1_KI270708v1_random 0 127682 0 chr1_KI270709v1_random 0 66860 0 chr1_KI270710v1_random 0 40176 0 chr1_KI270711v1_random 0 42210 0 chr1_KI270712v1_random 0 176043 0 chr1_KI270713v1_random 0 40745 0 chr1_KI270714v1_random 0 41717 0 chr2_KI270715v1_random 0 161471 0 chr2_KI270716v1_random 0 153799 0 chr3_GL000221v1_random 0 155397 0 chr4_GL000008v2_random 0 209709 0 chr5_GL000208v1_random 0 92689 0 chr9_KI270717v1_random 0 40062 0 chr9_KI270718v1_random 0 38054 0 chr9_KI270719v1_random 0 176845 0 chr9_KI270720v1_random 0 39050 0 chr11_KI270721v1_random 0 100316 0 chr14_GL000009v2_random 0 201709 0 chr14_GL000225v1_random 0 211173 0 chr14_KI270722v1_random 0 194050 0 chr14_GL000194v1_random 0 191469 0 chr14_KI270723v1_random 0 38115 0 chr14_KI270724v1_random 0 39555 0 chr14_KI270725v1_random 0 172810 0 chr14_KI270726v1_random 0 43739 0 chr15_KI270727v1_random 0 448248 0 chr16_KI270728v1_random 0 1872759 0 chr17_GL000205v2_random 0 185591 0 chr17_KI270729v1_random 0 280839 0 chr17_KI270730v1_random 0 112551 0 chr22_KI270731v1_random 0 150754 0 chr22_KI270732v1_random 0 41543 0 chr22_KI270733v1_random 0 179772 0 chr22_KI270734v1_random 0 165050 0 chr22_KI270735v1_random 0 42811 0 chr22_KI270736v1_random 0 181920 0 chr22_KI270737v1_random 0 103838 0 chr22_KI270738v1_random 0 99375 0 chr22_KI270739v1_random 0 73985 0 chrY_KI270740v1_random 0 37240 0 chrUn_KI270302v1 0 2274 0 chrUn_KI270304v1 0 2165 0 chrUn_KI270303v1 0 1942 0 chrUn_KI270305v1 0 1472 0 chrUn_KI270322v1 0 21476 0 chrUn_KI270320v1 0 4416 0 chrUn_KI270310v1 0 1201 0 chrUn_KI270316v1 0 1444 0 chrUn_KI270315v1 0 2276 0 chrUn_KI270312v1 0 998 0 chrUn_KI270311v1 0 12399 0 chrUn_KI270317v1 0 37690 0 chrUn_KI270412v1 0 1179 0 chrUn_KI270411v1 0 2646 0 chrUn_KI270414v1 0 2489 0 chrUn_KI270419v1 0 1029 0 chrUn_KI270418v1 0 2145 0 chrUn_KI270420v1 0 2321 0 chrUn_KI270424v1 0 2140 0 chrUn_KI270417v1 0 2043 0 chrUn_KI270422v1 0 1445 0 chrUn_KI270423v1 0 981 0 chrUn_KI270425v1 0 1884 0 chrUn_KI270429v1 0 1361 0 chrUn_KI270442v1 0 392061 0 chrUn_KI270466v1 0 1233 0 chrUn_KI270465v1 0 1774 0 chrUn_KI270467v1 0 3920 0 chrUn_KI270435v1 0 92983 0 chrUn_KI270438v1 0 112505 0 chrUn_KI270468v1 0 4055 0 chrUn_KI270510v1 0 2415 0 chrUn_KI270509v1 0 2318 0 chrUn_KI270518v1 0 2186 0 chrUn_KI270508v1 0 1951 0 chrUn_KI270516v1 0 1300 0 chrUn_KI270512v1 0 22689 0 chrUn_KI270519v1 0 138126 0 chrUn_KI270522v1 0 5674 0 chrUn_KI270511v1 0 8127 0 chrUn_KI270515v1 0 6361 0 chrUn_KI270507v1 0 5353 0 chrUn_KI270517v1 0 3253 0 chrUn_KI270529v1 0 1899 0 chrUn_KI270528v1 0 2983 0 chrUn_KI270530v1 0 2168 0 chrUn_KI270539v1 0 993 0 chrUn_KI270538v1 0 91309 0 chrUn_KI270544v1 0 1202 0 chrUn_KI270548v1 0 1599 0 chrUn_KI270583v1 0 1400 0 chrUn_KI270587v1 0 2969 0 chrUn_KI270580v1 0 1553 0 chrUn_KI270581v1 0 7046 0 chrUn_KI270579v1 0 31033 0 chrUn_KI270589v1 0 44474 0 chrUn_KI270590v1 0 4685 0 chrUn_KI270584v1 0 4513 0 chrUn_KI270582v1 0 6504 0 chrUn_KI270588v1 0 6158 0 chrUn_KI270593v1 0 3041 0 chrUn_KI270591v1 0 5796 0 chrUn_KI270330v1 0 1652 0 chrUn_KI270329v1 0 1040 0 chrUn_KI270334v1 0 1368 0 chrUn_KI270333v1 0 2699 0 chrUn_KI270335v1 0 1048 0 chrUn_KI270338v1 0 1428 0 chrUn_KI270340v1 0 1428 0 chrUn_KI270336v1 0 1026 0 chrUn_KI270337v1 0 1121 0 chrUn_KI270363v1 0 1803 0 chrUn_KI270364v1 0 2855 0 chrUn_KI270362v1 0 3530 0 chrUn_KI270366v1 0 8320 0 chrUn_KI270378v1 0 1048 0 chrUn_KI270379v1 0 1045 0 chrUn_KI270389v1 0 1298 0 chrUn_KI270390v1 0 2387 0 chrUn_KI270387v1 0 1537 0 chrUn_KI270395v1 0 1143 0 chrUn_KI270396v1 0 1880 0 chrUn_KI270388v1 0 1216 0 chrUn_KI270394v1 0 970 0 chrUn_KI270386v1 0 1788 0 chrUn_KI270391v1 0 1484 0 chrUn_KI270383v1 0 1750 0 chrUn_KI270393v1 0 1308 0 chrUn_KI270384v1 0 1658 0 chrUn_KI270392v1 0 971 0 chrUn_KI270381v1 0 1930 0 chrUn_KI270385v1 0 990 0 chrUn_KI270382v1 0 4215 0 chrUn_KI270376v1 0 1136 0 chrUn_KI270374v1 0 2656 0 chrUn_KI270372v1 0 1650 0 chrUn_KI270373v1 0 1451 0 chrUn_KI270375v1 0 2378 0 chrUn_KI270371v1 0 2805 0 chrUn_KI270448v1 0 7992 0 chrUn_KI270521v1 0 7642 0 chrUn_GL000195v1 0 182896 0 chrUn_GL000219v1 0 179198 0 chrUn_GL000220v1 0 161802 0 chrUn_GL000224v1 0 179693 0 chrUn_KI270741v1 0 157432 0 chrUn_GL000226v1 0 15008 0 chrUn_GL000213v1 0 164239 0 chrUn_KI270743v1 0 210658 0 chrUn_KI270744v1 0 168472 0 chrUn_KI270745v1 0 41891 0 chrUn_KI270746v1 0 66486 0 chrUn_KI270747v1 0 198735 0 chrUn_KI270748v1 0 93321 0 chrUn_KI270749v1 0 158759 0 chrUn_KI270750v1 0 148850 0 chrUn_KI270751v1 0 150742 0 chrUn_KI270752v1 0 27745 0 chrUn_KI270753v1 0 62944 0 chrUn_KI270754v1 0 40191 0 chrUn_KI270755v1 0 36723 0 chrUn_KI270756v1 0 79590 0 chrUn_KI270757v1 0 71251 0 chrUn_GL000214v1 0 137718 0 chrUn_KI270742v1 0 186739 0 chrUn_GL000216v2 0 176608 0 chrUn_GL000218v1 0 161147 0 chrEBV 0 171823 0 ALL_READS_ALL_BASES 675 megadepth-1.2.0/tests/test_exons.bed000066400000000000000000000002671420302544700174670ustar00rootroot00000000000000chr10 3104118 3104229 chr10 4358477 4359470 chr10 8722218 8725760 chr10 8729327 8730436 chr10 8756628 8756761 chr10 8780518 8780620 chr10 130592156 130592705 GL000219.1 150000 170000 megadepth-1.2.0/tests/test_noprefix.bam000066400000000000000000000062511420302544700201710ustar00rootroot00000000000000BC`T[0M*Mx8 I~KCHx@ZbSl;GT `xhU$dsνΛ_4]Dl(AH$}AFsn0i!UY23 +5bYBc"Y՘-} 6&6lUAv}÷t F٢0PYEnrڛ4)uVB5dݷHM"ʍϫ хϱWzZ!,yBᒎ DOV\taȐ%JrxU[2C-zA;Y| |Bna9zzqnU+ՙ+Z>\:_Ya'˜dDqzv(αl)p`r 3x@8+Mtd:A(hJRINDcu;S:MLqa1S߯dl-;>쇹LL9ැl`q8:RV?OjS9LfŜ,-ׄ+>gWCJlxS:.f4!7XyS/r3v}JR4^w[6)BC+ ՛]V춻<@l,ay|qmjQ5KM*lBB50%iNS9gqii3‚h-.^Z8ThcTG@&h:zJ=e݋Rу"x9 b).x1NO GYP`.\ |h0jF)A>>pЅO~wWHDxQC^c/TdA~|u4VVr][~kp9^M!옆M )$ ؒsp$<Ȃ(u42&@GAЎjbC*}aThҰDAaavnĨEL mر,raS \萵*=xUB,|X^;bzt&dyBv]I.Y^A3W1tΆmQ+(~~÷;1-8: WV y΍/N|WN\JD +x H|JgLz Y`]-Yx04'$X׋i>Pt4~Vf _1 >6]l(;=ETjaoeщ9JXEYJb6}n1{=5`S%wwX %6։IMJal3*`ݷ#},_*Rػm31hGahtto6%tiXWcXHt!7|6J{ G6Mr!>`& -L`mü g#KTk/5p#&1tXA8w3,D1Awص"v2$v)`(,$ lDZ$aoa€c)٨lu d@=N3Zfps8m,!b7 @:c=~;ua  T+aDsER DMgL~[%V( H -ѨЅ=JIT>Ѝ'2@7{_r. ׽dJ'Хzx+n㼰"L7a,ɼ,JȜ-pZ\ĘL%pya Փ\!0$7ƹ_[2\?0iĆı+$+O,!vifپBoM'A-ols u)mlݷn'bMB@J,y/+~Ț@i sT5S5RAtb{(I>JGbeCi0J?ΆzH~w:PSV3vڙh 'H/@tsA\I$ ^TԖ?-ߌ-4J.֒W`#yfk|gZDЁV/G.E념؁ ,8C<3q4BeWbnWMg0c:v#v*j#+6:@c ;Co4; ڶ"uPzaAndAĨ2|# "mk{~ϰqB~f+M1p~t\lC%䔈oxQdw4l1 ZL:Xh~ж.y0hAIڥ¾A?6:Bv RipMwhSm#N3)v:9胩؀2Z3r|"n CRcESz{Q\0uk.=c0`(rϲ y?مl |Zw5:3qgؐ7 ! jFkxi&:iu>;Ҧ@ vk= hkB,LYJ[X>J6&"*jYkJUIL܁4ht?d{ў.xØY&<;j t)A$A$Ax z.IRX' :_wM3~[LaC<-r{y=Q(Ux)׻+ol-}^gr qAx~r.~WҹΘ@Oe=Kly}`LeЭQ+]Tk0CBCmegadepth-1.2.0/tests/test_noprefix.bam.bai000066400000000000000000000110601420302544700207150ustar00rootroot00000000000000BAIJaaSaa aaaaaaaaaaaQ"a S<a"aJa ]LaaMa<aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a"a"a"a"a"a"a"a"a"a"a"a"a"a"a"a"a"a"a"a"al#al#al#al#al#a!$amegadepth-1.2.0/tests/test_run_out.txt000066400000000000000000000061251420302544700201120ustar00rootroot00000000000000+ export LD_LIBRARY_PATH=/data7/bamcount/libBigWig:/data7/bamcount/htslib:/data7/bamcount/libBigWig:/data7/bamcount/htslib:/usr/local/cuda-8.0/lib64:/home/cwilks/zstd-1.3.3/lib + LD_LIBRARY_PATH=/data7/bamcount/libBigWig:/data7/bamcount/htslib:/data7/bamcount/libBigWig:/data7/bamcount/htslib:/usr/local/cuda-8.0/lib64:/home/cwilks/zstd-1.3.3/lib + MD=../mosdepth + BT=/home/cwilks/bwtool_mine2/bwtool + B2B='python3 ../bam2bigwig.py' + KB2B=/data/kent_tools/bigWigToBedGraph + WT=wiggletools + EXONS=/data7/bamcount/exons.bed + EXONS=./test_exons.bed + BAM=test.bam + echo -n '' + echo -n '' + for t in default unique ++ perl -e '$t=default; print "0" if($t eq "default"); print "10" if($t eq "unique");' + qarg=0 + ../mosdepth -F260 -t 4 -Q0 test.bam.default test.bam real 0m0.836s user 0m0.498s sys 0m0.315s + cat /dev/fd/63 /dev/fd/62 ++ samtools view -H test.bam + python3 ../bam2bigwig.py test.bam.default.per-base.bed.gz.bw ++ zcat test.bam.default.per-base.bed.gz real 0m0.102s user 0m0.107s sys 0m0.435s + cut -f1-3,10 + /home/cwilks/bwtool_mine2/bwtool summary ./test_exons.bed test.bam.default.per-base.bed.gz.bw /dev/stdout -fill=0 -with-sum -keep-bed -decimals=0 + sort -k1,1 -k2,2n -k3,3n real 0m0.003s user 0m0.000s sys 0m0.004s + /data/kent_tools/bigWigToBedGraph test.bam.default.per-base.bed.gz.bw test.bam.default.per-base.bed.gz.bw.sums.bed.bg + wiggletools AUC test.bam.default.per-base.bed.gz.bw + perl -ne 'BEGIN { $t=default; } chomp; $f=$_; $f=~s/\.0+$//; print "ALL_READS_ALL_BASES\t$f\n" if($t eq "default"); print "UNIQUE_READS_ALL_BASES\t$f\n" if($t eq "unique");' + cat test.bam.default.per-base.bed.gz.bw.sums.bed + perl -ne 'BEGIN { $t=default; } chomp; ($c,$s,$e,$v)=split(/\t/,$_); $f+=$v; END { print "ALL_READS_ANNOTATED_BASES\t$f\n" if($t eq "default"); print "UNIQUE_READS_ANNOTATED_BASES\t$f\n" if($t eq "unique"); }' + for t in default unique ++ perl -e '$t=unique; print "0" if($t eq "default"); print "10" if($t eq "unique");' + qarg=10 + ../mosdepth -F260 -t 4 -Q10 test.bam.unique test.bam real 0m0.799s user 0m0.484s sys 0m0.294s + cat /dev/fd/63 /dev/fd/62 ++ samtools view -H test.bam + python3 ../bam2bigwig.py test.bam.unique.per-base.bed.gz.bw ++ zcat test.bam.unique.per-base.bed.gz real 0m0.108s user 0m0.125s sys 0m0.434s + /home/cwilks/bwtool_mine2/bwtool summary ./test_exons.bed test.bam.unique.per-base.bed.gz.bw /dev/stdout -fill=0 -with-sum -keep-bed -decimals=0 + cut -f1-3,10 + sort -k1,1 -k2,2n -k3,3n real 0m0.003s user 0m0.002s sys 0m0.002s + /data/kent_tools/bigWigToBedGraph test.bam.unique.per-base.bed.gz.bw test.bam.unique.per-base.bed.gz.bw.sums.bed.bg + wiggletools AUC test.bam.unique.per-base.bed.gz.bw + perl -ne 'BEGIN { $t=unique; } chomp; $f=$_; $f=~s/\.0+$//; print "ALL_READS_ALL_BASES\t$f\n" if($t eq "default"); print "UNIQUE_READS_ALL_BASES\t$f\n" if($t eq "unique");' + cat test.bam.unique.per-base.bed.gz.bw.sums.bed + perl -ne 'BEGIN { $t=unique; } chomp; ($c,$s,$e,$v)=split(/\t/,$_); $f+=$v; END { print "ALL_READS_ANNOTATED_BASES\t$f\n" if($t eq "default"); print "UNIQUE_READS_ANNOTATED_BASES\t$f\n" if($t eq "unique"); }' + cat test.bam.per-base.bed.gz.bw.aucs megadepth-1.2.0/tests/testbw1.annot_auc000066400000000000000000000000351420302544700200730ustar00rootroot00000000000000AUC_ANNOTATED_BASES 3864.000 megadepth-1.2.0/tests/testbw1.bed000066400000000000000000000015631420302544700166650ustar00rootroot00000000000000GL000219.1 0 168544 GL000219.1 168544 168619 GL000219.1 168619 168621 GL000219.1 168621 179198 chr10 0 4358517 chr10 4358517 4358578 chr10 4358578 4359039 chr10 4359039 4359045 chr10 4359045 4359059 chr10 4359059 4359067 chr10 4359067 4359086 chr10 4359086 4359122 chr10 4359122 4359131 chr10 4359131 4359137 chr10 4359137 4359156 chr10 4359156 4359159 chr10 4359159 4359185 chr10 4359185 4359188 chr10 4359188 4581019 chr10 4581019 4581030 chr10 4581030 8458609 chr10 8458609 8458622 chr10 8458622 8722264 chr10 8722264 8722314 chr10 8722314 8722446 chr10 8722446 8722507 chr10 8722507 8756699 chr10 8756699 8756714 chr10 8756714 8756761 chr10 8756761 8778558 chr10 8778558 8778617 chr10 8778617 8780518 chr10 8780518 8780528 chr10 8780528 8780543 chr10 8780543 8780567 chr10 8780567 8848720 chr10 8848720 8848742 chr10 8848742 8870679 chr10 8870679 8870690 chr10 8870690 130694993 megadepth-1.2.0/tests/testbw1.bed.out.tsv000066400000000000000000000021361420302544700203030ustar00rootroot00000000000000GL000219.1 0 168544 0.00 GL000219.1 168544 168619 150.00 GL000219.1 168619 168621 2.00 GL000219.1 168621 179198 0.00 chr10 0 4358517 0.00 chr10 4358517 4358578 61.00 chr10 4358578 4359039 0.00 chr10 4359039 4359045 54.00 chr10 4359045 4359059 140.00 chr10 4359059 4359067 104.00 chr10 4359067 4359086 266.00 chr10 4359086 4359122 576.00 chr10 4359122 4359131 153.00 chr10 4359131 4359137 84.00 chr10 4359137 4359156 247.00 chr10 4359156 4359159 12.00 chr10 4359159 4359185 78.00 chr10 4359185 4359188 6.00 chr10 4359188 4581019 0.00 chr10 4581019 4581030 11.00 chr10 4581030 8458609 0.00 chr10 8458609 8458622 13.00 chr10 8458622 8722264 0.00 chr10 8722264 8722314 50.00 chr10 8722314 8722446 0.00 chr10 8722446 8722507 61.00 chr10 8722507 8756699 0.00 chr10 8756699 8756714 90.00 chr10 8756714 8756761 940.00 chr10 8756761 8778558 0.00 chr10 8778558 8778617 59.00 chr10 8778617 8780518 0.00 chr10 8780518 8780528 200.00 chr10 8780528 8780543 210.00 chr10 8780543 8780567 264.00 chr10 8780567 8848720 0.00 chr10 8848720 8848742 22.00 chr10 8848742 8870679 0.00 chr10 8870679 8870690 11.00 chr10 8870690 130694993 0.00 megadepth-1.2.0/tests/testbw1.total_auc000066400000000000000000000000271420302544700201000ustar00rootroot00000000000000AUC_ALL_BASES 3864.000 megadepth-1.2.0/tests/testbw2.annot_auc000066400000000000000000000000351420302544700200740ustar00rootroot00000000000000AUC_ANNOTATED_BASES 1277.000 megadepth-1.2.0/tests/testbw2.bed000066400000000000000000000001201420302544700166520ustar00rootroot00000000000000chr10 0 10 chr10 8756697 8756762 chr10 4359156 4359188 GL000219.1 168500 168620 megadepth-1.2.0/tests/testbw2.bed.max000066400000000000000000000001451420302544700174450ustar00rootroot00000000000000chr10 0 10 0.00 chr10 8756697 8756762 20.00 chr10 4359156 4359188 4.00 GL000219.1 168500 168620 2.00 megadepth-1.2.0/tests/testbw2.bed.mean000066400000000000000000000001451420302544700176000ustar00rootroot00000000000000chr10 0 10 0.00 chr10 8756697 8756762 15.85 chr10 4359156 4359188 3.00 GL000219.1 168500 168620 1.26 megadepth-1.2.0/tests/testbw2.bed.min000066400000000000000000000001441420302544700174420ustar00rootroot00000000000000chr10 0 10 0.00 chr10 8756697 8756762 0.00 chr10 4359156 4359188 2.00 GL000219.1 168500 168620 0.00 megadepth-1.2.0/tests/testbw2.bed.out.tsv000066400000000000000000000001521420302544700203000ustar00rootroot00000000000000chr10 0 10 0.00 chr10 8756697 8756762 1030.00 chr10 4359156 4359188 96.00 GL000219.1 168500 168620 151.00 megadepth-1.2.0/zlib_ci/000077500000000000000000000000001420302544700150645ustar00rootroot00000000000000