pyBigWig-0.3.18/000077500000000000000000000000001400776027600133355ustar00rootroot00000000000000pyBigWig-0.3.18/.environmentLinux.yaml000066400000000000000000000002301400776027600176560ustar00rootroot00000000000000name: foo channels: - conda-forge - bioconda - default dependencies: - gcc_linux-64 - curl - zlib - python 3.8 - pip - numpy - nose pyBigWig-0.3.18/.github/000077500000000000000000000000001400776027600146755ustar00rootroot00000000000000pyBigWig-0.3.18/.github/workflows/000077500000000000000000000000001400776027600167325ustar00rootroot00000000000000pyBigWig-0.3.18/.github/workflows/build.yml000066400000000000000000000007151400776027600205570ustar00rootroot00000000000000on: pull_request jobs: testLinux: name: TestLinux runs-on: "ubuntu-latest" defaults: run: shell: bash -l {0} steps: - uses: actions/checkout@v2 - uses: conda-incubator/setup-miniconda@v2 with: activate-environment: foo environment-file: .environmentLinux.yaml python-version: 3.8 auto-activate-base: false - run: | pip install . nosetests -sv pyBigWig-0.3.18/.github/workflows/pypi.yml000066400000000000000000000025051400776027600204400ustar00rootroot00000000000000name: pypi on: [push] jobs: pypi: name: upload to pypi runs-on: ubuntu-latest steps: - uses: actions/checkout@v1 - name: Setup conda if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') run: | curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -o miniconda.sh bash miniconda.sh -b -p $HOME/miniconda export PATH="$HOME/miniconda/bin:$PATH" hash -r conda config --set always_yes yes --set changeps1 no - name: create env if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') run: | export PATH=$HOME/miniconda/bin:$PATH conda create -n foo -q --yes -c conda-forge -c bioconda python=3.7 twine numpy libcurl curl zlib - name: sdist if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') run: | export PATH=$HOME/miniconda/bin:$PATH source activate foo rm -f dist/* python setup.py sdist - name: upload if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') env: TWINE_USERNAME: "__token__" TWINE_PASSWORD: ${{ secrets.pypi_password }} run: | export PATH=$HOME/miniconda/bin:$PATH source activate foo twine upload dist/* pyBigWig-0.3.18/.gitignore000066400000000000000000000013551400776027600153310ustar00rootroot00000000000000# Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] # C extensions *.so # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib64/ parts/ sdist/ var/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *,cover # Translations *.mo *.pot # Django stuff: *.log # Sphinx documentation docs/_build/ # PyBuilder target/ *.o #./setup.py sdist creates this MANIFEST *.swp pyBigWig-0.3.18/.gitmodules000066400000000000000000000000001400776027600155000ustar00rootroot00000000000000pyBigWig-0.3.18/LICENSE.txt000066400000000000000000000020661400776027600151640ustar00rootroot00000000000000The MIT License (MIT) Copyright (c) 2015 Devon Ryan Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. pyBigWig-0.3.18/MANIFEST.in000066400000000000000000000001661400776027600150760ustar00rootroot00000000000000include LICENSE.txt README.md *.c *.h setup.py include libBigWig/* include pyBigWigTest/* exclude pyBigWig.egg-info/* pyBigWig-0.3.18/README.md000066400000000000000000000444761400776027600146330ustar00rootroot00000000000000[![PyPI version](https://badge.fury.io/py/pyBigWig.svg)](https://badge.fury.io/py/pyBigWig) [![Travis-CI status](https://travis-ci.org/deeptools/pyBigWig.svg?branch=master)](https://travis-ci.org/dpryan79/pyBigWig.svg?branch=master) [![bioconda-badge](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](http://bioconda.github.io) [![DOI](https://zenodo.org/badge/doi/10.5281/zenodo.45238.svg)](http://dx.doi.org/10.5281/zenodo.45238) # pyBigWig A python extension, written in C, for quick access to bigBed files and access to and creation of bigWig files. This extension uses [libBigWig](https://github.com/dpryan79/libBigWig) for local and remote file access. Table of Contents ================= * [Installation](#installation) * [Requirements](#requirements) * [Usage](#usage) * [Load the extension](#load-the-extension) * [Open a bigWig or bigBed file](#open-a-bigwig-or-bigbed-file) * [Determining the file type](#determining-the-file-type) * [Access the list of chromosomes and their lengths](#access-the-list-of-chromosomes-and-their-lengths) * [Print the header](#print-the-header) * [Compute summary information on a range](#compute-summary-information-on-a-range) * [A note on statistics and zoom levels](#a-note-on-statistics-and-zoom-levels) * [Retrieve values for individual bases in a range](#retrieve-values-for-individual-bases-in-a-range) * [Retrieve all intervals in a range](#retrieve-all-intervals-in-a-range) * [Retrieving bigBed entries](#retrieving-bigbed-entries) * [Add a header to a bigWig file](#add-a-header-to-a-bigwig-file) * [Adding entries to a bigWig file](#adding-entries-to-a-bigwig-file) * [Close a bigWig or bigBed file](#close-a-bigwig-or-bigbed-file) * [Numpy](#numpy) * [Remote file access](#remote-file-access) * [Empty files](#empty-files) * [A note on coordinates](#a-note-on-coordinates) * [Galaxy](#galaxy) # Installation You can install this extension directly from github with: pip install pyBigWig or with conda conda install pybigwig -c conda-forge -c bioconda ## Requirements The follow non-python requirements must be installed: - libcurl (and the `curl-config` config) - zlib The headers and libraries for these are required. # Usage Basic usage is as follows: ## Load the extension >>> import pyBigWig ## Open a bigWig or bigBed file This will work if your working directory is the pyBigWig source code directory. >>> bw = pyBigWig.open("test/test.bw") Note that if the file doesn't exist you'll see an error message and `None` will be returned. Be default, all files are opened for reading and not writing. You can alter this by passing a mode containing `w`: >>> bw = pyBigWig.open("test/output.bw", "w") Note that a file opened for writing can't be queried for its intervals or statistics, it can *only* be written to. If you open a file for writing then you will next need to add a header (see the section on this below). Local and remote bigBed read access is also supported: >>> bb = pyBigWig.open("https://www.encodeproject.org/files/ENCFF001JBR/@@download/ENCFF001JBR.bigBed") While you can specify a mode for bigBed files, it is ignored. The object returned by `pyBigWig.open()` is the same regardless of whether you're opening a bigWig or bigBed file. ## Determining the file type Since bigWig and bigBed files can both be opened, it may be necessary to determine whether a given `bigWigFile` object points to a bigWig or bigBed file. To that end, one can use the `isBigWig()` and `isBigBed()` functions: >>> bw = pyBigWig.open("test/test.bw") >>> bw.isBigWig() True >>> bw.isBigBed() False ## Access the list of chromosomes and their lengths `bigWigFile` objects contain a dictionary holding the chromosome lengths, which can be accessed with the `chroms()` accessor. >>> bw.chroms() dict_proxy({'1': 195471971L, '10': 130694993L}) You can also directly query a particular chromosome. >>> bw.chroms("1") 195471971L The lengths are stored a the "long" integer type, which is why there's an `L` suffix. If you specify a non-existant chromosome then nothing is output. >>> bw.chroms("c") >>> ## Print the header It's sometimes useful to print a bigWig's header. This is presented here as a python dictionary containing: the version (typically `4`), the number of zoom levels (`nLevels`), the number of bases described (`nBasesCovered`), the minimum value (`minVal`), the maximum value (`maxVal`), the sum of all values (`sumData`), and the sum of all squared values (`sumSquared`). The last two of these are needed for determining the mean and standard deviation. >>> bw.header() {'maxVal': 2L, 'sumData': 272L, 'minVal': 0L, 'version': 4L, 'sumSquared': 500L, 'nLevels': 1L, 'nBasesCovered': 154L} Note that this is also possible for bigBed files and the same dictionary keys will be present. Entries such as `maxVal`, `sumData`, `minVal`, and `sumSquared` are then largely not meaningful. ## Compute summary information on a range bigWig files are used to store values associated with positions and ranges of them. Typically we want to quickly access the average value over a range, which is very simple: >>> bw.stats("1", 0, 3) [0.2000000054637591] Suppose instead of the mean value, we instead wanted the maximum value: >>> bw.stats("1", 0, 3, type="max") [0.30000001192092896] Other options are "min" (the minimum value), "coverage" (the fraction of bases covered), and "std" (the standard deviation of the values). It's often the case that we would instead like to compute values of some number of evenly spaced bins in a given interval, which is also simple: >>> bw.stats("1",99, 200, type="max", nBins=2) [1.399999976158142, 1.5] `nBins` defaults to 1, just as `type` defaults to `mean`. If the start and end positions are omitted then the entire chromosome is used: >>> bw.stats("1") [1.3351851569281683] ### A note on statistics and zoom levels > A note to the lay reader: This section is rather technical and included only for the sake of completeness. The summary is that if your needs require exact mean/max/etc. summary values for an interval or intervals and that a small trade-off in speed is acceptable, that you should use the `exact=True` option in the `stats()` function. By default, there are some unintuitive aspects to computing statistics on ranges in a bigWig file. The bigWig format was originally created in the context of genome browsers. There, computing exact summary statistics for a given interval is less important than quickly being able to compute an approximate statistic (after all, browsers need to be able to quickly display a number of contiguous intervals and support scrolling/zooming). Because of this, bigWig files contain not only interval-value associations, but also `sum of values`/`sum of squared values`/`minimum value`/`maximum value`/`number of bases covered` for equally sized bins of various sizes. These different sizes are referred to as "zoom levels". The smallest zoom level has bins that are 16 times the mean interval size in the file and each subsequent zoom level has bins 4 times larger than the previous. This methodology is used in Kent's tools and, therefore, likely used in almost every currently existing bigWig file. When a bigWig file is queried for a summary statistic, the size of the interval is used to determine whether to use a zoom level and, if so, which one. The optimal zoom level is that which has the largest bins no more than half the width of the desired interval. If no such zoom level exists, the original intervals are instead used for the calculation. For the sake of consistency with other tools, pyBigWig adopts this same methodology. However, since this is (A) unintuitive and (B) undesirable in some applications, pyBigWig enables computation of exact summary statistics regardless of the interval size (i.e., it allows ignoring the zoom levels). This was originally proposed [here](https://github.com/dpryan79/pyBigWig/issues/12) and an example is below: >>> import pyBigWig >>> from numpy import mean >>> bw = pyBigWig.open("http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeMapability/wgEncodeCrgMapabilityAlign75mer.bigWig") >>> bw.stats('chr1', 89294, 91629) [0.20120902053804418] >>> mean(bw.values('chr1', 89294, 91629)) 0.22213841940688142 >>> bw.stats('chr1', 89294, 91629, exact=True) [0.22213841940688142] ## Retrieve values for individual bases in a range While the `stats()` method **can** be used to retrieve the original values for each base (e.g., by setting `nBins` to the number of bases), it's preferable to instead use the `values()` accessor. >>> bw.values("1", 0, 3) [0.10000000149011612, 0.20000000298023224, 0.30000001192092896] The list produced will always contain one value for every base in the range specified. If a particular base has no associated value in the bigWig file then the returned value will be `nan`. >>> bw.values("1", 0, 4) [0.10000000149011612, 0.20000000298023224, 0.30000001192092896, nan] ## Retrieve all intervals in a range Sometimes it's convenient to retrieve all entries overlapping some range. This can be done with the `intervals()` function: >>> bw.intervals("1", 0, 3) ((0, 1, 0.10000000149011612), (1, 2, 0.20000000298023224), (2, 3, 0.30000001192092896)) What's returned is a list of tuples containing: the start position, end end position, and the value. Thus, the example above has values of `0.1`, `0.2`, and `0.3` at positions `0`, `1`, and `2`, respectively. If the start and end position are omitted then all intervals on the chromosome specified are returned: >>> bw.intervals("1") ((0, 1, 0.10000000149011612), (1, 2, 0.20000000298023224), (2, 3, 0.30000001192092896), (100, 150, 1.399999976158142), (150, 151, 1.5)) ## Retrieving bigBed entries As opposed to bigWig files, bigBed files hold entries, which are intervals with an associated string. You can access these entries using the `entries()` function: >>> bb = pyBigWig.open("https://www.encodeproject.org/files/ENCFF001JBR/@@download/ENCFF001JBR.bigBed") >>> bb.entries('chr1', 10000000, 10020000) [(10009333, 10009640, '61035\t130\t-\t0.026\t0.42\t404'), (10014007, 10014289, '61047\t136\t-\t0.029\t0.42\t404'), (10014373, 10024307, '61048\t630\t-\t5.420\t0.00\t2672399')] The output is a list of entry tuples. The tuple elements are the `start` and `end` position of each entry, followed by its associated `string`. The string is returned exactly as it's held in the bigBed file, so parsing it is left to you. To determine what the various fields are in these string, consult the SQL string: >>> bb.SQL() table RnaElements "BED6 + 3 scores for RNA Elements data" ( string chrom; "Reference sequence chromosome or scaffold" uint chromStart; "Start position in chromosome" uint chromEnd; "End position in chromosome" string name; "Name of item" uint score; "Normalized score from 0-1000" char[1] strand; "+ or - or . for unknown" float level; "Expression level such as RPKM or FPKM. Set to -1 for no data." float signif; "Statistical significance such as IDR. Set to -1 for no data." uint score2; "Additional measurement/count e.g. number of reads. Set to 0 for no data." ) Note that the first three entries in the SQL string are not part of the string. If you only need to know where entries are and not their associated values, you can save memory by additionally specifying `withString=False` in `entries()`: >>> bb.entries('chr1', 10000000, 10020000, withString=False) [(10009333, 10009640), (10014007, 10014289), (10014373, 10024307)] ## Add a header to a bigWig file If you've opened a file for writing then you'll need to give it a header before you can add any entries. The header contains all of the chromosomes, **in order**, and their sizes. If your chromosome has two chromosomes, chr1 and chr2, of lengths 1 and 1.5 million bases, then the following would add an appropriate header: >>> bw.addHeader([("chr1", 1000000), ("chr2", 1500000)]) bigWig headers are case-sensitive, so `chr1` and `Chr1` are different. Likewise, `1` and `chr1` are not the same, so you can't mix Ensembl and UCSC chromosome names. After adding a header, you can then add entries. By default, up to 10 "zoom levels" are constructed for bigWig files. You can change this default number with the `maxZooms` optional argument. A common use of this is to create a bigWig file that simply holds intervals and no zoom levels: >>> bw.addHeader([("chr1", 1000000), ("chr2", 1500000)], maxZooms=0) If you set `maxTooms=0`, please note that IGV and many other tools WILL NOT WORK as they assume that at least one zoom level will be present. You are advised to use the default unless you do not expect the bigWig files to be used by other packages. ## Adding entries to a bigWig file Assuming you've opened a file for writing and added a header, you can then add entries. Note that the entries **must** be added in order, as bigWig files always contain ordered intervals. There are three formats that bigWig files can use internally to store entries. The most commonly observed format is identical to a [bedGraph](https://genome.ucsc.edu/goldenpath/help/bedgraph.html) file: chr1 0 100 0.0 chr1 100 120 1.0 chr1 125 126 200.0 These entries would be added as follows: >>> bw.addEntries(["chr1", "chr1", "chr1"], [0, 100, 125], ends=[5, 120, 126], values=[0.0, 1.0, 200.0]) Each entry occupies 12 bytes before compression. The second format uses a fixed span, but a variable step size between entries. These can be represented in a [wiggle](http://genome.ucsc.edu/goldenpath/help/wiggle.html) file as: variableStep chrom=chr1 span=20 500 -2.0 600 150.0 635 25.0 The above entries describe (1-based) positions 501-520, 601-620 and 636-655. These would be added as follows: >>> bw.addEntries("chr1", [500, 600, 635], values=[-2.0, 150.0, 25.0], span=20) Each entry of this type occupies 8 bytes before compression. The final format uses a fixed step and span for each entry, corresponding to the fixedStep [wiggle format](http://genome.ucsc.edu/goldenpath/help/wiggle.html): fixedStep chrom=chr1 step=30 span=20 -5.0 -20.0 25.0 The above entries describe (1-based) bases 901-920, 931-950 and 961-980 and would be added as follows: >>> bw.addEntries("chr1", 900, values=[-5.0, -20.0, 25.0], span=20, step=30) Each entry of this type occupies 4 bytes. Note that pyBigWig will try to prevent you from adding entries in an incorrect order. This, however, requires additional over-head. Should that not be acceptable, you can simply specify `validate=False` when adding entries: >>> bw.addEntries(["chr1", "chr1", "chr1"], [100, 0, 125], ends=[120, 5, 126], values=[0.0, 1.0, 200.0], validate=False) You're obviously then responsible for ensuring that you **do not** add entries out of order. The resulting files would otherwise largley not be usable. ## Close a bigWig or bigBed file A file can be closed with a simple `bw.close()`, as is commonly done with other file types. For files opened for writing, closing a file writes any buffered entries to disk, constructs and writes the file index, and constructs zoom levels. Consequently, this can take a bit of time. # Numpy As of version 0.3.0, pyBigWig supports input of coordinates using numpy integers and vectors in some functions **if numpy was installed prior to installing pyBigWig**. To determine if pyBigWig was installed with numpy support by checking the `numpy` accessor: >>> import pyBigWig >>> pyBigWig.numpy 1 If `pyBigWig.numpy` is `1`, then pyBigWig was compiled with numpy support. This means that `addEntries()` can accept numpy coordinates: >>> import pyBigWig >>> import numpy >>> bw = pyBigWig.open("/tmp/delete.bw", "w") >>> bw.addHeader([("1", 1000)], maxZooms=0) >>> chroms = np.array(["1"] * 10) >>> starts = np.array([0, 10, 20, 30, 40, 50, 60, 70, 80, 90], dtype=np.int64) >>> ends = np.array([5, 15, 25, 35, 45, 55, 65, 75, 85, 95], dtype=np.int64) >>> values0 = np.array(np.random.random_sample(10), dtype=np.float64) >>> bw.addEntries(chroms, starts, ends=ends, values=values0) >>> bw.close() Additionally, `values()` can directly output a numpy vector: >>> bw = bw.open("/tmp/delete.bw") >>> bw.values('1', 0, 10, numpy=True) [ 0.74336642 0.74336642 0.74336642 0.74336642 0.74336642 nan nan nan nan nan] >>> type(bw.values('1', 0, 10, numpy=True)) # Remote file access If you do not have curl installed, pyBigWig will be installed without the ability to access remote files. You can determine if you will be able to access remote files with `pyBigWig.remote`. If that returns 1, then you can access remote files. If it returns 0 then you can't. # Empty files As of version 0.3.5, pyBigWig is able to read and write bigWig files lacking entries. Please note that such files are generally not compatible with other programs, since there's no definition of how a bigWig file with no entries should look. For such a file, the `intervals()` accessor will return `None`, the `stats()` function will return a list of `None` of the desired length, and `values()` will return `[]` (an empty list). This should generally allow programs utilizing pyBigWig to continue without issue. For those wishing to mimic the functionality of pyBigWig/libBigWig in this regard, please note that it looks at the number of bases covered (as reported in the file header) to check for "empty" files. # A note on coordinates Wiggle, bigWig, and bigBed files use 0-based half-open coordinates, which are also used by this extension. So to access the value for the first base on `chr1`, one would specify the starting position as `0` and the end position as `1`. Similarly, bases 100 to 115 would have a start of `99` and an end of `115`. This is simply for the sake of consistency with the underlying bigWig file and may change in the future. # Galaxy pyBigWig is also available as a package in [Galaxy](http://www.usegalaxy.org). You can find it in the toolshed and the [IUC](https://wiki.galaxyproject.org/IUC) is currently hosting the XML definition of this on [github](https://github.com/galaxyproject/tools-iuc/tree/master/packages/package_python_2_7_10_pybigwig_0_2_8). pyBigWig-0.3.18/pyBigWig.c000066400000000000000000001622361400776027600152340ustar00rootroot00000000000000#include #include #include "pyBigWig.h" #ifdef WITHNUMPY #include #include "numpy/npy_common.h" #include "numpy/halffloat.h" #include "numpy/ndarrayobject.h" #include "numpy/arrayscalars.h" int lsize = NPY_SIZEOF_LONG; //Raises an exception on error, which should be checked uint32_t getNumpyU32(PyArrayObject *obj, Py_ssize_t i) { int dtype; char *p; uint32_t o = 0; npy_intp stride; //Get the dtype dtype = PyArray_TYPE(obj); //Get the stride stride = PyArray_STRIDE(obj, 0); p = PyArray_BYTES(obj) + i*stride; switch(dtype) { case NPY_INT8: if(((int8_t *) p)[0] < 0) { PyErr_SetString(PyExc_RuntimeError, "Received an integer < 0!\n"); goto error; } o += ((int8_t *) p)[0]; break; case NPY_INT16: if(((int16_t *) p)[0] < 0) { PyErr_SetString(PyExc_RuntimeError, "Received an integer < 0!\n"); goto error; } o += ((int16_t *) p)[0]; break; case NPY_INT32: if(((int32_t *) p)[0] < 0) { PyErr_SetString(PyExc_RuntimeError, "Received an integer < 0!\n"); goto error; } o += ((int32_t *) p)[0]; break; case NPY_INT64: if(((int64_t *) p)[0] < 0) { PyErr_SetString(PyExc_RuntimeError, "Received an integer < 0!\n"); goto error; } o += ((int64_t *) p)[0]; break; case NPY_UINT8: o += ((uint8_t *) p)[0]; break; case NPY_UINT16: o += ((uint16_t *) p)[0]; break; case NPY_UINT32: o += ((uint32_t *) p)[0]; break; case NPY_UINT64: if(((uint64_t *) p)[0] > (uint32_t) -1) { PyErr_SetString(PyExc_RuntimeError, "Received an integer larger than possible for a 32bit unsigned integer!\n"); goto error; } o += ((uint64_t *) p)[0]; break; default: PyErr_SetString(PyExc_RuntimeError, "Received unknown data type for conversion to uint32_t!\n"); goto error; break; } return o; error: return 0; }; long getNumpyL(PyObject *obj) { short s; int i; long l; long long ll; unsigned short us; unsigned int ui; unsigned long ul; unsigned long long ull; if(!PyArray_IsIntegerScalar(obj)) { PyErr_SetString(PyExc_RuntimeError, "Received non-Integer scalar type for conversion to long!\n"); return 0; } if(PyArray_IsScalar(obj, Short)) { s = ((PyShortScalarObject *)obj)->obval; l = s; } else if(PyArray_IsScalar(obj, Int)) { i = ((PyLongScalarObject *)obj)->obval; l = i; } else if(PyArray_IsScalar(obj, Long)) { l = ((PyLongScalarObject *)obj)->obval; } else if(PyArray_IsScalar(obj, LongLong)) { ll = ((PyLongScalarObject *)obj)->obval; l = ll; } else if(PyArray_IsScalar(obj, UShort)) { us = ((PyLongScalarObject *)obj)->obval; l = us; } else if(PyArray_IsScalar(obj, UInt)) { ui = ((PyLongScalarObject *)obj)->obval; l = ui; } else if(PyArray_IsScalar(obj, ULong)) { ul = ((PyLongScalarObject *)obj)->obval; l = ul; } else if(PyArray_IsScalar(obj, ULongLong)) { ull = ((PyLongScalarObject *)obj)->obval; l = ull; } else { PyErr_SetString(PyExc_RuntimeError, "Received unknown scalar type for conversion to long!\n"); return 0; } return l; } //Raises an exception on error, which should be checked float getNumpyF(PyArrayObject *obj, Py_ssize_t i) { int dtype; char *p; float o = 0.0; npy_intp stride; //Get the dtype dtype = PyArray_TYPE(obj); //Get the stride stride = PyArray_STRIDE(obj, 0); p = PyArray_BYTES(obj) + i*stride; switch(dtype) { case NPY_FLOAT16: return npy_half_to_float(((npy_half*)p)[0]); case NPY_FLOAT32: return ((float*)p)[0]; case NPY_FLOAT64: if(((double*)p)[0] > FLT_MAX) { PyErr_SetString(PyExc_RuntimeError, "Received a floating point value greater than possible for a 32-bit float!\n"); goto error; } if(((double*)p)[0] < -FLT_MAX) { PyErr_SetString(PyExc_RuntimeError, "Received a floating point value less than possible for a 32-bit float!\n"); goto error; } o += ((double*)p)[0]; return o; default: PyErr_SetString(PyExc_RuntimeError, "Received unknown data type for conversion to float!\n"); goto error; break; } return o; error: return 0; } //The calling function needs to free the result char *getNumpyStr(PyArrayObject *obj, Py_ssize_t i) { char *p , *o = NULL; npy_intp stride, j; int dtype; //Get the dtype dtype = PyArray_TYPE(obj); //Get the stride stride = PyArray_STRIDE(obj, 0); p = PyArray_BYTES(obj) + i*stride; switch(dtype) { case NPY_STRING: o = calloc(1, stride + 1); strncpy(o, p, stride); return o; case NPY_UNICODE: o = calloc(1, stride/4 + 1); for(j=0; jhdr->indexOffset != 0) return 1; // No index, no entries pyBigWig issue #111 //if(bw->hdr->nBasesCovered > 0) return 1; // Sometimes headers are broken return 0; } PyObject* pyBwEnter(pyBigWigFile_t*self, PyObject *args) { bigWigFile_t *bw = self->bw; if(!bw) { PyErr_SetString(PyExc_RuntimeError, "The bigWig file handle is not opened!"); return NULL; } Py_INCREF(self); return (PyObject*) self; } PyObject* pyBwOpen(PyObject *self, PyObject *pyFname) { char *fname = NULL; char *mode = "r"; pyBigWigFile_t *pybw; bigWigFile_t *bw = NULL; if(!PyArg_ParseTuple(pyFname, "s|s", &fname, &mode)) goto error; //Open the local/remote file if(strchr(mode, 'w') != NULL || bwIsBigWig(fname, NULL)) { bw = bwOpen(fname, NULL, mode); } else { bw = bbOpen(fname, NULL); } if(!bw) { fprintf(stderr, "[pyBwOpen] bw is NULL!\n"); goto error; } if(!mode || !strchr(mode, 'w')) { if(!bw->cl) goto error; } pybw = PyObject_New(pyBigWigFile_t, &bigWigFile); if(!pybw) { fprintf(stderr, "[pyBwOpen] PyObject_New() returned NULL (out of memory?)!\n"); goto error; } pybw->bw = bw; pybw->lastTid = -1; pybw->lastType = -1; pybw->lastSpan = (uint32_t) -1; pybw->lastStep = (uint32_t) -1; pybw->lastStart = (uint32_t) -1; return (PyObject*) pybw; error: if(bw) bwClose(bw); PyErr_SetString(PyExc_RuntimeError, "Received an error during file opening!"); return NULL; } static void pyBwDealloc(pyBigWigFile_t *self) { if(self->bw) bwClose(self->bw); PyObject_DEL(self); } static PyObject *pyBwClose(pyBigWigFile_t *self, PyObject *args) { bwClose(self->bw); self->bw = NULL; Py_INCREF(Py_None); return Py_None; } //Accessor for the header (version, nLevels, nBasesCovered, minVal, maxVal, sumData, sumSquared static PyObject *pyBwGetHeader(pyBigWigFile_t *self, PyObject *args) { bigWigFile_t *bw = self->bw; PyObject *ret, *val; if(!bw) { PyErr_SetString(PyExc_RuntimeError, "The bigWig file handle is not opened!"); return NULL; } if(bw->isWrite == 1) { PyErr_SetString(PyExc_RuntimeError, "The header cannot be accessed in files opened for writing!"); return NULL; } ret = PyDict_New(); val = PyLong_FromUnsignedLong(bw->hdr->version); if(PyDict_SetItemString(ret, "version", val) == -1) goto error; Py_DECREF(val); val = PyLong_FromUnsignedLong(bw->hdr->nLevels); if(PyDict_SetItemString(ret, "nLevels", val) == -1) goto error; Py_DECREF(val); val = PyLong_FromUnsignedLongLong(bw->hdr->nBasesCovered); if(PyDict_SetItemString(ret, "nBasesCovered", val) == -1) goto error; Py_DECREF(val); val = PyLong_FromDouble(bw->hdr->minVal); if(PyDict_SetItemString(ret, "minVal", val) == -1) goto error; Py_DECREF(val); val = PyLong_FromDouble(bw->hdr->maxVal); if(PyDict_SetItemString(ret, "maxVal", val) == -1) goto error; Py_DECREF(val); val = PyLong_FromDouble(bw->hdr->sumData); if(PyDict_SetItemString(ret, "sumData", val) == -1) goto error; Py_DECREF(val); val = PyLong_FromDouble(bw->hdr->sumSquared); if(PyDict_SetItemString(ret, "sumSquared", val) == -1) goto error; Py_DECREF(val); return ret; error : Py_XDECREF(val); Py_XDECREF(ret); PyErr_SetString(PyExc_RuntimeError, "Received an error while getting the bigWig header!"); return NULL; } //Accessor for the chroms, args is optional static PyObject *pyBwGetChroms(pyBigWigFile_t *self, PyObject *args) { PyObject *ret = NULL, *val; bigWigFile_t *bw = self->bw; char *chrom = NULL; uint32_t i; if(!bw) { PyErr_SetString(PyExc_RuntimeError, "The bigWig file handle is not opened!"); return NULL; } if(bw->isWrite == 1) { PyErr_SetString(PyExc_RuntimeError, "Chromosomes cannot be accessed in files opened for writing!"); return NULL; } if(!(PyArg_ParseTuple(args, "|s", &chrom)) || !chrom) { ret = PyDict_New(); for(i=0; icl->nKeys; i++) { val = PyLong_FromUnsignedLong(bw->cl->len[i]); if(PyDict_SetItemString(ret, bw->cl->chrom[i], val) == -1) goto error; Py_DECREF(val); } } else { for(i=0; icl->nKeys; i++) { if(strcmp(bw->cl->chrom[i],chrom) == 0) { ret = PyLong_FromUnsignedLong(bw->cl->len[i]); break; } } } if(!ret) { Py_INCREF(Py_None); ret = Py_None; } return ret; error : Py_XDECREF(val); Py_XDECREF(ret); PyErr_SetString(PyExc_RuntimeError, "Received an error while adding an item to the output dictionary!"); return NULL; } enum bwStatsType char2enum(char *s) { if(strcmp(s, "mean") == 0) return mean; if(strcmp(s, "std") == 0) return stdev; if(strcmp(s, "dev") == 0) return dev; if(strcmp(s, "max") == 0) return max; if(strcmp(s, "min") == 0) return min; if(strcmp(s, "cov") == 0) return cov; if(strcmp(s, "coverage") == 0) return cov; if(strcmp(s, "sum") == 0) return sum; return -1; }; //Fetch summary statistics, default is the mean of the entire chromosome. static PyObject *pyBwGetStats(pyBigWigFile_t *self, PyObject *args, PyObject *kwds) { bigWigFile_t *bw = self->bw; double *val; uint32_t start, end = -1, tid; unsigned long startl = 0, endl = -1; static char *kwd_list[] = {"chrom", "start", "end", "type", "nBins", "exact", "numpy", NULL}; char *chrom, *type = "mean"; PyObject *ret, *exact = Py_False, *starto = NULL, *endo = NULL; PyObject *outputNumpy = Py_False; int i, nBins = 1; errno = 0; //In the off-chance that something elsewhere got an error and didn't clear it... if(!bw) { PyErr_SetString(PyExc_RuntimeError, "The bigWig file handle is not open!"); return NULL; } if(bw->isWrite == 1) { PyErr_SetString(PyExc_RuntimeError, "Statistics cannot be accessed in files opened for writing!"); return NULL; } if(bw->type == 1) { PyErr_SetString(PyExc_RuntimeError, "bigBed files have no statistics!"); return NULL; } if(!PyArg_ParseTupleAndKeywords(args, kwds, "s|OOsiOO", kwd_list, &chrom, &starto, &endo, &type, &nBins, &exact, &outputNumpy)) { PyErr_SetString(PyExc_RuntimeError, "You must supply at least a chromosome!"); return NULL; } //Check inputs, reset to defaults if nothing was input if(!nBins) nBins = 1; //For some reason, not specifying this overrides the default! if(!type) type = "mean"; tid = bwGetTid(bw, chrom); if(starto) { #ifdef WITHNUMPY if(PyArray_IsScalar(starto, Integer)) { startl = (long) getNumpyL(starto); } else #endif if(PyLong_Check(starto)) { startl = PyLong_AsLong(starto); #if PY_MAJOR_VERSION < 3 } else if(PyInt_Check(starto)) { startl = PyInt_AsLong(starto); #endif } else { PyErr_SetString(PyExc_RuntimeError, "The start coordinate must be a number!"); return NULL; } } if(endo) { #ifdef WITHNUMPY if(PyArray_IsScalar(endo, Integer)) { endl = (long) getNumpyL(endo); } else #endif if(PyLong_Check(endo)) { endl = PyLong_AsLong(endo); #if PY_MAJOR_VERSION < 3 } else if(PyInt_Check(endo)) { endl = PyInt_AsLong(endo); #endif } else { PyErr_SetString(PyExc_RuntimeError, "The end coordinate must be a number!"); return NULL; } } if(endl == (unsigned long) -1 && tid != (uint32_t) -1) endl = bw->cl->len[tid]; if(tid == (uint32_t) -1 || startl > end || endl > end) { PyErr_SetString(PyExc_RuntimeError, "Invalid interval bounds!"); return NULL; } start = (uint32_t) startl; end = (uint32_t) endl; if(end <= start || end > bw->cl->len[tid] || start >= end) { PyErr_SetString(PyExc_RuntimeError, "Invalid interval bounds!"); return NULL; } if(char2enum(type) == doesNotExist) { PyErr_SetString(PyExc_RuntimeError, "Invalid type!"); return NULL; } //Return a list of None if there are no entries at all if(!hasEntries(bw)) { #ifdef WITHNUMPY if(outputNumpy == Py_True) { val = malloc(sizeof(double)*nBins); for(i=0; ibw; int i; uint32_t start, end = -1, tid; unsigned long startl, endl; char *chrom; PyObject *ret, *starto = NULL, *endo = NULL; bwOverlappingIntervals_t *o; if(!bw) { PyErr_SetString(PyExc_RuntimeError, "The bigWig file handle is not open!"); return NULL; } if(bw->type == 1) { PyErr_SetString(PyExc_RuntimeError, "bigBed files have no values! Use 'entries' instead."); return NULL; } #ifdef WITHNUMPY static char *kwd_list[] = {"chrom", "start", "end", "numpy", NULL}; PyObject *outputNumpy = Py_False; if(!PyArg_ParseTupleAndKeywords(args, kwds, "sOO|O", kwd_list, &chrom, &starto, &endo, &outputNumpy)) { #else if(!PyArg_ParseTuple(args, "sOO", &chrom, &starto, &endo)) { #endif PyErr_SetString(PyExc_RuntimeError, "You must supply a chromosome, start and end position.\n"); return NULL; } tid = bwGetTid(bw, chrom); #ifdef WITHNUMPY if(PyArray_IsScalar(starto, Integer)) { startl = (long) getNumpyL(starto); } else #endif if(PyLong_Check(starto)) { startl = PyLong_AsLong(starto); #if PY_MAJOR_VERSION < 3 } else if(PyInt_Check(starto)) { startl = PyInt_AsLong(starto); #endif } else { PyErr_SetString(PyExc_RuntimeError, "The start coordinate must be a number!"); return NULL; } #ifdef WITHNUMPY if(PyArray_IsScalar(endo, Integer)) { endl = (long) getNumpyL(endo); } else #endif if(PyLong_Check(endo)) { endl = PyLong_AsLong(endo); #if PY_MAJOR_VERSION < 3 } else if(PyInt_Check(endo)) { endl = PyInt_AsLong(endo); #endif } else { PyErr_SetString(PyExc_RuntimeError, "The end coordinate must be a number!"); return NULL; } if(endl == (unsigned long) -1 && tid != (uint32_t) -1) endl = bw->cl->len[tid]; if(tid == (uint32_t) -1 || startl > end || endl > end) { PyErr_SetString(PyExc_RuntimeError, "Invalid interval bounds!"); return NULL; } start = (uint32_t) startl; end = (uint32_t) endl; if(end <= start || end > bw->cl->len[tid] || start >= end) { PyErr_SetString(PyExc_RuntimeError, "Invalid interval bounds!"); return NULL; } if(!hasEntries(self->bw)) { #ifdef WITHNUMPY if(outputNumpy == Py_True) { return PyArray_SimpleNew(0, NULL, NPY_FLOAT); } else { #endif return PyList_New(0); #ifdef WITHNUMPY } #endif } o = bwGetValues(self->bw, chrom, start, end, 1); if(!o) { PyErr_SetString(PyExc_RuntimeError, "An error occurred while fetching values!"); return NULL; } #ifdef WITHNUMPY if(outputNumpy == Py_True) { npy_intp len = end - start; ret = PyArray_SimpleNewFromData(1, &len, NPY_FLOAT, (void *) o->value); //This will break if numpy ever stops using malloc! PyArray_ENABLEFLAGS((PyArrayObject*) ret, NPY_ARRAY_OWNDATA); free(o->start); free(o->end); free(o); } else { #endif ret = PyList_New(end-start); for(i=0; i<(int) o->l; i++) PyList_SetItem(ret, i, PyFloat_FromDouble(o->value[i])); bwDestroyOverlappingIntervals(o); #ifdef WITHNUMPY } #endif return ret; } static PyObject *pyBwGetIntervals(pyBigWigFile_t *self, PyObject *args, PyObject *kwds) { bigWigFile_t *bw = self->bw; uint32_t start, end = -1, tid, i; unsigned long startl = 0, endl = -1; static char *kwd_list[] = {"chrom", "start", "end", NULL}; bwOverlappingIntervals_t *intervals = NULL; char *chrom; PyObject *ret, *starto = NULL, *endo = NULL; if(!bw) { PyErr_SetString(PyExc_RuntimeError, "The bigWig file handle is not opened!"); return NULL; } if(bw->isWrite == 1) { PyErr_SetString(PyExc_RuntimeError, "Intervals cannot be accessed in files opened for writing!"); return NULL; } if(bw->type == 1) { PyErr_SetString(PyExc_RuntimeError, "bigBed files have no intervals! Use 'entries()' instead."); return NULL; } if(!PyArg_ParseTupleAndKeywords(args, kwds, "s|OO", kwd_list, &chrom, &starto, &endo)) { PyErr_SetString(PyExc_RuntimeError, "You must supply at least a chromosome.\n"); return NULL; } //Sanity check tid = bwGetTid(bw, chrom); if(endl == (unsigned long) -1 && tid != (uint32_t) -1) endl = bw->cl->len[tid]; if(tid == (uint32_t) -1 || startl > end || endl > end) { PyErr_SetString(PyExc_RuntimeError, "Invalid interval bounds!"); return NULL; } if(starto) { #ifdef WITHNUMPY if(PyArray_IsScalar(starto, Integer)) { startl = (long) getNumpyL(starto); } else #endif if(PyLong_Check(starto)) { startl = PyLong_AsLong(starto); #if PY_MAJOR_VERSION < 3 } else if(PyInt_Check(starto)) { startl = PyInt_AsLong(starto); #endif } else { PyErr_SetString(PyExc_RuntimeError, "The start coordinate must be a number!"); return NULL; } } if(endo) { #ifdef WITHNUMPY if(PyArray_IsScalar(endo, Integer)) { endl = (long) getNumpyL(endo); } else #endif if(PyLong_Check(endo)) { endl = PyLong_AsLong(endo); #if PY_MAJOR_VERSION < 3 } else if(PyInt_Check(endo)) { endl = PyInt_AsLong(endo); #endif } else { PyErr_SetString(PyExc_RuntimeError, "The end coordinate must be a number!"); return NULL; } } start = (uint32_t) startl; end = (uint32_t) endl; if(end <= start || end > bw->cl->len[tid] || start >= end) { PyErr_SetString(PyExc_RuntimeError, "Invalid interval bounds!"); return NULL; } //Check for empty files if(!hasEntries(bw)) { Py_INCREF(Py_None); return Py_None; } //Get the intervals intervals = bwGetOverlappingIntervals(bw, chrom, start, end); if(!intervals) { PyErr_SetString(PyExc_RuntimeError, "An error occurred while fetching the overlapping intervals!"); return NULL; } if(!intervals->l) { Py_INCREF(Py_None); return Py_None; } ret = PyTuple_New(intervals->l); for(i=0; il; i++) { if(PyTuple_SetItem(ret, i, Py_BuildValue("(iif)", intervals->start[i], intervals->end[i], intervals->value[i]))) { Py_DECREF(ret); bwDestroyOverlappingIntervals(intervals); PyErr_SetString(PyExc_RuntimeError, "An error occurred while constructing the output tuple!"); return NULL; } } bwDestroyOverlappingIntervals(intervals); return ret; } #if PY_MAJOR_VERSION >= 3 //Return 1 iff obj is a ready unicode type int PyString_Check(PyObject *obj) { if(PyUnicode_Check(obj)) { return PyUnicode_READY(obj)+1; } return 0; } //I don't know what happens if PyBytes_AsString(NULL) is used... char *PyString_AsString(PyObject *obj) { return PyUnicode_AsUTF8(obj); } #endif //Will return 1 for long or int types currently int isNumeric(PyObject *obj) { #ifdef WITHNUMPY if(PyArray_IsScalar(obj, Integer)) return 1; #endif #if PY_MAJOR_VERSION < 3 if(PyInt_Check(obj)) return 1; #endif return PyLong_Check(obj); } //On error, throws a runtime error, so use PyErr_Occurred() after this uint32_t Numeric2Uint(PyObject *obj) { long l; #if PY_MAJOR_VERSION < 3 if(PyInt_Check(obj)) { return (uint32_t) PyInt_AsLong(obj); } #endif l = PyLong_AsLong(obj); //Check bounds if(l > 0xFFFFFFFF) { PyErr_SetString(PyExc_RuntimeError, "Length out of bounds for a bigWig file!"); return (uint32_t) -1; } return (uint32_t) l; } //This runs bwCreateHdr, bwCreateChromList, and bwWriteHdr PyObject *pyBwAddHeader(pyBigWigFile_t *self, PyObject *args, PyObject *kwds) { bigWigFile_t *bw = self->bw; char **chroms = NULL; int64_t n; uint32_t *lengths = NULL, len; int32_t maxZooms = 10; long zoomTmp = 10; static char *kwd_list[] = {"cl", "maxZooms", NULL}; PyObject *InputTuple = NULL, *tmpObject, *tmpObject2; Py_ssize_t i, pyLen; if(!bw) { PyErr_SetString(PyExc_RuntimeError, "The bigWig file handle is not open!"); return NULL; } if(!PyArg_ParseTupleAndKeywords(args, kwds, "O|k", kwd_list, &InputTuple, &zoomTmp)) { PyErr_SetString(PyExc_RuntimeError, "Illegal arguments"); return NULL; } maxZooms = zoomTmp; //Ensure that we received a list if(!PyList_Check(InputTuple)) { PyErr_SetString(PyExc_RuntimeError, "You MUST input a list of tuples (e.g., [('chr1', 1000), ('chr2', 2000)]!"); goto error; } pyLen = PyList_Size(InputTuple); if(pyLen < 1) { PyErr_SetString(PyExc_RuntimeError, "You input an empty list!"); goto error; } n = pyLen; lengths = calloc(n, sizeof(uint32_t)); chroms = calloc(n, sizeof(char*)); if(!lengths || !chroms) { PyErr_SetString(PyExc_RuntimeError, "Couldn't allocate lengths or chroms!"); goto error; } //Convert the tuple into something more useful in C for(i=0; i 0xFFFFFFFF) { PyErr_SetString(PyExc_RuntimeError, "A requested length is longer than what can be stored in a bigWig file!"); goto error; } lengths[i] = len; } //Create the header if(bwCreateHdr(bw, maxZooms)) { PyErr_SetString(PyExc_RuntimeError, "Received an error in bwCreateHdr"); goto error; } //Create the chromosome list bw->cl = bwCreateChromList(chroms, lengths, n); if(!bw->cl) { PyErr_SetString(PyExc_RuntimeError, "Received an error in bwCreateChromList"); goto error; } //Write the header if(bwWriteHdr(bw)) { PyErr_SetString(PyExc_RuntimeError, "Received an error while writing the bigWig header"); goto error; } if(lengths) free(lengths); if(chroms) free(chroms); Py_INCREF(Py_None); return Py_None; error: if(lengths) free(lengths); if(chroms) free(chroms); return NULL; } //1 on true, 0 on false int isType0(PyObject *chroms, PyObject *starts, PyObject *ends, PyObject *values) { int rv = 0; Py_ssize_t i, sz = 0; PyObject *tmp; if(!PyList_Check(chroms) #ifdef WITHNUMPY && !PyArray_Check(chroms) #endif ) return rv; if(!PyList_Check(starts) #ifdef WITHNUMPY && !PyArray_Check(starts) #endif ) return rv; if(!PyList_Check(ends) #ifdef WITHNUMPY && !PyArray_Check(ends) #endif ) return rv; if(!PyList_Check(values) #ifdef WITHNUMPY && !PyArray_Check(values) #endif ) return rv; if(PyList_Check(chroms)) sz = PyList_Size(chroms); #ifdef WITHNUMPY if(PyArray_Check(chroms)) sz += PyArray_Size(chroms); #endif if(PyList_Check(starts)) { if(sz != PyList_Size(starts)) return rv; #ifdef WITHNUMPY } else { if(sz != PyArray_Size(starts)) return rv; #endif } if(PyList_Check(ends)) { if(sz != PyList_Size(ends)) return rv; #ifdef WITHNUMPY } else { if(sz != PyArray_Size(ends)) return rv; #endif } if(PyList_Check(values)) { if(sz != PyList_Size(values)) return rv; #ifdef WITHNUMPY } else { if(sz != PyArray_Size(values)) return rv; #endif } //Ensure chroms contains strings, etc. if(PyList_Check(chroms)) { for(i=0; ibw; Py_ssize_t i, sz = 0; uint32_t tid, uspan, ustep, ustart; PyObject *tmp; #ifdef WITHNUMPY char *chrom; #endif if(self->lastType == -1) return 0; if(self->lastTid == -1) return 0; if(self->lastType != desiredType) return 0; //We can only append if (A) we have the same type or (B) the same chromosome (and compatible span/step/starts if(desiredType == 0) { //We need (A) chrom == lastTid and (B) all chroms to be the same if(PyList_Check(chroms)) sz = PyList_Size(chroms); #ifdef WITHNUMPY if(PyArray_Check(chroms)) sz = PyArray_Size(chroms); #endif for(i=0; ilastTid) return 0; } #ifdef WITHNUMPY if(PyArray_Check(starts)) { ustart = getNumpyU32((PyArrayObject*)starts, 0); } else { #endif ustart = Numeric2Uint(PyList_GetItem(starts, 0)); #ifdef WITHNUMPY } #endif if(PyErr_Occurred()) return 0; if(ustart < self->lastStart) return 0; return 1; } else if(desiredType == 1) { //We need (A) chrom == lastTid, (B) all chroms to be the same, and (C) equal spans uspan = Numeric2Uint(span); if(PyErr_Occurred()) return 0; if(uspan != self->lastSpan) return 0; if(!PyString_Check(chroms)) return 0; tid = bwGetTid(bw, PyString_AsString(chroms)); if(tid != (uint32_t) self->lastTid) return 0; #ifdef WITHNUMPY if(PyList_Check(starts)) ustart = Numeric2Uint(PyList_GetItem(starts, 0)); else ustart = getNumpyU32((PyArrayObject*) starts, 0); #else ustart = Numeric2Uint(PyList_GetItem(starts, 0)); #endif if(PyErr_Occurred()) return 0; if(ustart < self->lastStart) return 0; return 1; } else if(desiredType == 2) { //We need (A) chrom == lastTid, (B) span/step to be equal and (C) compatible starts tid = bwGetTid(bw, PyString_AsString(chroms)); if(tid != (uint32_t) self->lastTid) return 0; uspan = Numeric2Uint(span); if(PyErr_Occurred()) return 0; if(uspan != self->lastSpan) return 0; ustep = Numeric2Uint(step); if(PyErr_Occurred()) return 0; if(ustep != self->lastStep) return 0; //But is the start position compatible? ustart = Numeric2Uint(starts); if(PyErr_Occurred()) return 0; if(ustart != self->lastStart) return 0; return 1; } return 0; } //Returns 0 on success, 1 on error. Sets self->lastTid && self->lastStart (unless there was an error) int PyAddIntervals(pyBigWigFile_t *self, PyObject *chroms, PyObject *starts, PyObject *ends, PyObject *values) { bigWigFile_t *bw = self->bw; Py_ssize_t i, sz = 0; char **cchroms = NULL; uint32_t n, *ustarts = NULL, *uends = NULL; float *fvalues = NULL; int rv; if(PyList_Check(starts)) sz = PyList_Size(starts); #ifdef WITHNUMPY if(PyArray_Check(starts)) sz += PyArray_Size(starts); #endif n = (uint32_t) sz; //Allocate space cchroms = calloc(n, sizeof(char*)); ustarts = calloc(n, sizeof(uint32_t)); uends = calloc(n, sizeof(uint32_t)); fvalues = calloc(n, sizeof(float)); if(!cchroms || !ustarts || !uends || !fvalues) goto error; for(i=0; ilastTid = bwGetTid(bw, cchroms[n-1]); self->lastStart = uends[n-1]; } if(!PyList_Check(chroms)) { for(i=0; ilastStart int PyAppendIntervals(pyBigWigFile_t *self, PyObject *starts, PyObject *ends, PyObject *values) { bigWigFile_t *bw = self->bw; Py_ssize_t i, sz = 0; uint32_t n, *ustarts = NULL, *uends = NULL; float *fvalues = NULL; int rv; if(PyList_Check(starts)) sz = PyList_Size(starts); #ifdef WITHNUMPY if(PyArray_Check(starts)) sz += PyArray_Size(starts); #endif n = (uint32_t) sz; //Allocate space ustarts = calloc(n, sizeof(uint32_t)); uends = calloc(n, sizeof(uint32_t)); fvalues = calloc(n, sizeof(float)); if(!ustarts || !uends || !fvalues) goto error; for(i=0; ilastStart = uends[n-1]; free(ustarts); free(uends); free(fvalues); return rv; error: if(ustarts) free(ustarts); if(uends) free(uends); if(fvalues) free(fvalues); return 1; } //Returns 0 on success, 1 on error. Sets self->lastTid/lastStart/lastSpan (unless there was an error) int PyAddIntervalSpans(pyBigWigFile_t *self, PyObject *chroms, PyObject *starts, PyObject *values, PyObject *span) { bigWigFile_t *bw = self->bw; Py_ssize_t i, sz = 0; char *cchroms = NULL; uint32_t n, *ustarts = NULL, uspan; float *fvalues = NULL; int rv; if(PyList_Check(starts)) sz = PyList_Size(starts); #ifdef WITHNUMPY else if(PyArray_Check(starts)) sz += PyArray_Size(starts); #endif n = (uint32_t) sz; //Allocate space ustarts = calloc(n, sizeof(uint32_t)); fvalues = calloc(n, sizeof(float)); if(!ustarts || !fvalues) goto error; uspan = (uint32_t) PyLong_AsLong(span); cchroms = PyString_AsString(chroms); if(PyList_Check(starts)) { for(i=0; ilastTid = bwGetTid(bw, cchroms); self->lastSpan = uspan; self->lastStart = ustarts[n-1]+uspan; } free(ustarts); free(fvalues); return rv; error: if(ustarts) free(ustarts); if(fvalues) free(fvalues); return 1; } //Returns 0 on success, 1 on error. Updates self->lastStart int PyAppendIntervalSpans(pyBigWigFile_t *self, PyObject *starts, PyObject *values) { bigWigFile_t *bw = self->bw; Py_ssize_t i, sz = 0; uint32_t n, *ustarts = NULL; float *fvalues = NULL; int rv; if(PyList_Check(starts)) sz = PyList_Size(starts); #ifdef WITHNUMPY else if(PyArray_Check(starts)) sz += PyArray_Size(starts); #endif n = (uint32_t) sz; //Allocate space ustarts = calloc(n, sizeof(uint32_t)); fvalues = calloc(n, sizeof(float)); if(!ustarts || !fvalues) goto error; if(PyList_Check(starts)) { for(i=0; ilastStart = ustarts[n-1] + self->lastSpan; free(ustarts); free(fvalues); return rv; error: if(ustarts) free(ustarts); if(fvalues) free(fvalues); return 1; } //Returns 0 on success, 1 on error. Sets self->lastTid/self->lastSpan/lastStep/lastStart (unless there was an error) int PyAddIntervalSpanSteps(pyBigWigFile_t *self, PyObject *chroms, PyObject *starts, PyObject *values, PyObject *span, PyObject *step) { bigWigFile_t *bw = self->bw; Py_ssize_t i, sz = 0; char *cchrom = NULL; uint32_t n, ustarts, uspan, ustep; float *fvalues = NULL; int rv; if(PyList_Check(values)) sz = PyList_Size(values); #ifdef WITHNUMPY else if(PyArray_Check(values)) sz += PyArray_Size(values); #endif n = (uint32_t) sz; //Allocate space fvalues = calloc(n, sizeof(float)); if(!fvalues) goto error; uspan = (uint32_t) PyLong_AsLong(span); ustep = (uint32_t) PyLong_AsLong(step); ustarts = (uint32_t) PyLong_AsLong(starts); cchrom = PyString_AsString(chroms); if(PyList_Check(values)) { for(i=0; ilastTid = bwGetTid(bw, cchrom); self->lastSpan = uspan; self->lastStep = ustep; self->lastStart = ustarts + ustep*n; } free(fvalues); return rv; error: if(fvalues) free(fvalues); return 1; } //Returns 0 on success, 1 on error. Sets self->lastStart int PyAppendIntervalSpanSteps(pyBigWigFile_t *self, PyObject *values) { bigWigFile_t *bw = self->bw; Py_ssize_t i, sz = 0; uint32_t n; float *fvalues = NULL; int rv; if(PyList_Check(values)) sz = PyList_Size(values); #ifdef WITHNUMPY else if(PyArray_Check(values)) sz += PyArray_Size(values); #endif n = (uint32_t) sz; //Allocate space fvalues = calloc(n, sizeof(float)); if(!fvalues) goto error; if(PyList_Check(values)) { for(i=0; ilastStart += self->lastStep * n; free(fvalues); return rv; error: if(fvalues) free(fvalues); return 1; } //Checks and ensures that (A) the entries are sorted correctly and don't overlap and (B) that the come after things that have already been added. //Returns 1 on correct input, 0 on incorrect input int addEntriesInputOK(pyBigWigFile_t *self, PyObject *chroms, PyObject *starts, PyObject *ends, PyObject *span, PyObject *step, int type) { uint32_t lastTid = self->lastTid; uint32_t lastEnd = self->lastStart; uint32_t cTid, ustart, uend, uspan, ustep; Py_ssize_t i, sz = 0; PyObject *tmp; #ifdef WITHNUMPY char *tmpStr; #endif if(type == 0) { //Each chrom:start-end needs to be properly formed and come after prior entries if(PyList_Check(starts)) sz = PyList_Size(starts); #ifdef WITHNUMPY if(PyArray_Check(starts)) sz += PyArray_Size(starts); #endif if(sz == 0) return 0; for(i=0; ibw, tmpStr); free(tmpStr); } else { #endif tmp = PyList_GetItem(chroms, i); cTid = bwGetTid(self->bw, PyString_AsString(tmp)); #ifdef WITHNUMPY } #endif if(PyErr_Occurred()) return 0; if(cTid == (uint32_t) -1) return 0; #ifdef WITHNUMPY if(PyArray_Check(starts)) { ustart = getNumpyU32((PyArrayObject*)starts, i); } else { #endif ustart = Numeric2Uint(PyList_GetItem(starts, i)); #ifdef WITHNUMPY } #endif if(PyErr_Occurred()) return 0; #ifdef WITHNUMPY if(PyArray_Check(ends)) { uend = getNumpyU32((PyArrayObject*) ends, i); } else { #endif uend = Numeric2Uint(PyList_GetItem(ends, i)); #ifdef WITHNUMPY } #endif if(PyErr_Occurred()) return 0; if(ustart >= uend) return 0; if(lastTid != (uint32_t) -1) { if(lastTid > cTid) return 0; if(lastTid == cTid) { if(ustart < lastEnd) return 0; } } lastTid = cTid; lastEnd = uend; } return 1; } else if(type == 1) { //each chrom:start-(start+span) needs to be properly formed and come after prior entries if(!PyList_Check(starts) #ifdef WITHNUMPY && !PyArray_Check(starts) #endif ) return 0; if(PyList_Check(starts)) sz = PyList_Size(starts); #ifdef WITHNUMPY else if(PyArray_Check(starts)) sz += PyArray_Size(starts); #endif uspan = Numeric2Uint(span); if(PyErr_Occurred()) return 0; if(uspan < 1) return 0; if(sz == 0) return 0; cTid = bwGetTid(self->bw, PyString_AsString(chroms)); if(cTid == (uint32_t) -1) return 0; if(lastTid != (uint32_t) -1) { if(lastTid > cTid) return 0; } for(i=0; ibw, PyString_AsString(chroms)); if(cTid == (uint32_t) -1) return 0; ustart = Numeric2Uint(starts); if(PyErr_Occurred()) return 0; uspan = Numeric2Uint(span); if(PyErr_Occurred()) return 0; if(uspan < 1) return 0; ustep = Numeric2Uint(step); if(PyErr_Occurred()) return 0; if(ustep < 1) return 0; if(lastTid != (uint32_t) -1) { if(lastTid > cTid) return 0; if(lastTid == cTid) { if(ustart < lastEnd) return 0; } } return 1; } return 0; } PyObject *pyBwAddEntries(pyBigWigFile_t *self, PyObject *args, PyObject *kwds) { static char *kwd_list[] = {"chroms", "starts", "ends", "values", "span", "step", "validate", NULL}; PyObject *chroms = NULL, *starts = NULL, *ends = NULL, *values = NULL, *span = NULL, *step = NULL; PyObject *validate = Py_True; int desiredType; if(!self->bw) { PyErr_SetString(PyExc_RuntimeError, "The bigWig file handle is not open!"); return NULL; } if(!PyArg_ParseTupleAndKeywords(args, kwds, "OO|OOOOO", kwd_list, &chroms, &starts, &ends, &values, &span, &step, &validate)) { PyErr_SetString(PyExc_RuntimeError, "Illegal arguments"); return NULL; } desiredType = getType(chroms, starts, ends, values, span, step); if(desiredType == -1) { PyErr_SetString(PyExc_RuntimeError, "You must provide a valid set of entries. These can be comprised of any of the following: \n" "1. A list of each of chromosomes, start positions, end positions and values.\n" "2. A list of each of start positions and values. Also, a chromosome and span must be specified.\n" "3. A list values, in which case a single chromosome, start position, span and step must be specified.\n"); return NULL; } if(validate == Py_True && !addEntriesInputOK(self, chroms, starts, ends, span, step, desiredType)) { PyErr_SetString(PyExc_RuntimeError, "The entries you tried to add are out of order, precede already added entries, or otherwise use illegal values.\n" " Please correct this and try again.\n"); return NULL; } if(canAppend(self, desiredType, chroms, starts, span, step)) { switch(desiredType) { case 0: if(PyAppendIntervals(self, starts, ends, values)) goto error; break; case 1: if(PyAppendIntervalSpans(self, starts, values)) goto error; break; case 2: if(PyAppendIntervalSpanSteps(self, values)) goto error; break; } } else { switch(desiredType) { case 0: if(PyAddIntervals(self, chroms, starts, ends, values)) goto error; break; case 1: if(PyAddIntervalSpans(self, chroms, starts, values, span)) goto error; break; case 2: if(PyAddIntervalSpanSteps(self, chroms, starts, values, span, step)) goto error; break; } } self->lastType = desiredType; Py_INCREF(Py_None); return Py_None; error: return NULL; } /************************************************************** * * BigBed functions, added in 0.3.0 * **************************************************************/ static PyObject *pyBBGetEntries(pyBigWigFile_t *self, PyObject *args, PyObject *kwds) { bigWigFile_t *bw = self->bw; uint32_t i; uint32_t start, end = -1, tid; unsigned long startl, endl; char *chrom; static char *kwd_list[] = {"chrom", "start", "end", "withString", NULL}; PyObject *ret, *t, *starto = NULL, *endo = NULL; PyObject *withStringPy = Py_True; int withString = 1; bbOverlappingEntries_t *o; if(!bw) { PyErr_SetString(PyExc_RuntimeError, "The bigBed file handle is not open!"); return NULL; } if(bw->type == 0) { PyErr_SetString(PyExc_RuntimeError, "bigWig files have no entries! Use 'intervals' or 'values' instead."); return NULL; } if(!PyArg_ParseTupleAndKeywords(args, kwds, "sOO|O", kwd_list, &chrom, &starto, &endo, &withStringPy)) { PyErr_SetString(PyExc_RuntimeError, "You must supply a chromosome, start and end position.\n"); return NULL; } tid = bwGetTid(bw, chrom); #ifdef WITHNUMPY if(PyArray_IsScalar(starto, Integer)) { startl = (long) getNumpyL(starto); } else #endif if(PyLong_Check(starto)) { startl = PyLong_AsLong(starto); #if PY_MAJOR_VERSION < 3 } else if(PyInt_Check(starto)) { startl = PyInt_AsLong(starto); #endif } else { PyErr_SetString(PyExc_RuntimeError, "The start coordinate must be a number!"); return NULL; } #ifdef WITHNUMPY if(PyArray_IsScalar(endo, Integer)) { endl = (long) getNumpyL(endo); } else #endif if(PyLong_Check(endo)) { endl = PyLong_AsLong(endo); #if PY_MAJOR_VERSION < 3 } else if(PyInt_Check(endo)) { endl = PyInt_AsLong(endo); #endif } else { PyErr_SetString(PyExc_RuntimeError, "The end coordinate must be a number!"); return NULL; } if(endl == (unsigned long) -1 && tid != (uint32_t) -1) endl = bw->cl->len[tid]; if(tid == (uint32_t) -1 || startl > end || endl > end) { PyErr_SetString(PyExc_RuntimeError, "Invalid interval bounds!"); return NULL; } start = (uint32_t) startl; end = (uint32_t) endl; if(end <= start || end > bw->cl->len[tid] || start >= end) { PyErr_SetString(PyExc_RuntimeError, "Invalid interval bounds!"); return NULL; } if(withStringPy == Py_False) withString = 0; o = bbGetOverlappingEntries(bw, chrom, start, end, withString); if(!o) { PyErr_SetString(PyExc_RuntimeError, "An error occurred while fetching the overlapping entries!\n"); return NULL; } if(!o->l) { Py_INCREF(Py_None); return Py_None; } ret = PyList_New(o->l); if(!ret) goto error; for(i=0; il; i++) { if(withString) { t = Py_BuildValue("(iis)", o->start[i], o->end[i], o->str[i]); } else { t = Py_BuildValue("(ii)", o->start[i], o->end[i]); } if(!t) goto error; PyList_SetItem(ret, i, t); } bbDestroyOverlappingEntries(o); return ret; error: Py_DECREF(ret); bbDestroyOverlappingEntries(o); PyErr_SetString(PyExc_RuntimeError, "An error occurred while constructing the output list and tuple!"); return NULL; } static PyObject *pyBBGetSQL(pyBigWigFile_t *self, PyObject *args) { bigWigFile_t *bw = self->bw; char *str = bbGetSQL(bw); size_t len = 0; PyObject *o = NULL; if(!bw) { PyErr_SetString(PyExc_RuntimeError, "The bigBed file handle is not open!"); return NULL; } if(!str) { Py_INCREF(Py_None); return Py_None; } len = strlen(str); #if PY_MAJOR_VERSION >= 3 o = PyBytes_FromStringAndSize(str, len); #else o = PyString_FromStringAndSize(str, len); #endif if(str) free(str); return o; } static PyObject *pyIsBigWig(pyBigWigFile_t *self, PyObject *args) { bigWigFile_t *bw = self->bw; if(bw->type == 0) { Py_INCREF(Py_True); return Py_True; } Py_INCREF(Py_False); return Py_False; } static PyObject *pyIsBigBed(pyBigWigFile_t *self, PyObject *args) { bigWigFile_t *bw = self->bw; if(!bw) { PyErr_SetString(PyExc_RuntimeError, "The bigBed file handle is not open!"); return NULL; } if(bw->type == 1) { Py_INCREF(Py_True); return Py_True; } Py_INCREF(Py_False); return Py_False; } /************************************************************** * * End of bigBed functions * **************************************************************/ #if PY_MAJOR_VERSION >= 3 PyMODINIT_FUNC PyInit_pyBigWig(void) { #else PyMODINIT_FUNC initpyBigWig(void) { #endif PyObject *res; errno = 0; //just in case #if PY_MAJOR_VERSION >= 3 if(Py_AtExit(bwCleanup)) return NULL; if(PyType_Ready(&bigWigFile) < 0) return NULL; if(bwInit(128000)) return NULL; res = PyModule_Create(&pyBigWigmodule); if(!res) return NULL; #else if(Py_AtExit(bwCleanup)) return; if(PyType_Ready(&bigWigFile) < 0) return; if(bwInit(128000)) return; res = Py_InitModule3("pyBigWig", bwMethods, "A module for handling bigWig files"); #endif Py_INCREF(&bigWigFile); PyModule_AddObject(res, "pyBigWig", (PyObject *) &bigWigFile); #ifdef WITHNUMPY //Add the numpy constant import_array(); //Needed for numpy stuff to work PyModule_AddIntConstant(res, "numpy", 1); #else PyModule_AddIntConstant(res, "numpy", 0); #endif #ifdef NOCURL PyModule_AddIntConstant(res, "remote", 0); #else PyModule_AddIntConstant(res, "remote", 1); #endif PyModule_AddStringConstant(res, "__version__", pyBigWigVersion); #if PY_MAJOR_VERSION >= 3 return res; #endif } pyBigWig-0.3.18/pyBigWig.h000066400000000000000000000440031400776027600152300ustar00rootroot00000000000000#include #include #include "bigWig.h" #define pyBigWigVersion "0.3.18" typedef struct { PyObject_HEAD bigWigFile_t *bw; int32_t lastTid; //The TID of the last written entry (or -1) uint32_t lastSpan; //The span of the last written entry (if applicable) uint32_t lastStep; //The step of the last written entry (if applicable) uint32_t lastStart; //The next start position (if applicable) int lastType; //The type of the last written entry } pyBigWigFile_t; static PyObject *pyBwOpen(PyObject *self, PyObject *pyFname); static PyObject *pyBwEnter(pyBigWigFile_t *self, PyObject *args); static PyObject *pyBwClose(pyBigWigFile_t *pybw, PyObject *args); static PyObject *pyBwGetChroms(pyBigWigFile_t *pybw, PyObject *args); static PyObject *pyIsBigWig(pyBigWigFile_t *pybw, PyObject *args); static PyObject *pyIsBigBed(pyBigWigFile_t *pybw, PyObject *args); static PyObject *pyBwGetStats(pyBigWigFile_t *pybw, PyObject *args, PyObject *kwds); #ifdef WITHNUMPY static PyObject *pyBwGetValues(pyBigWigFile_t *pybw, PyObject *args, PyObject *kwds); #else static PyObject *pyBwGetValues(pyBigWigFile_t *pybw, PyObject *args); #endif static PyObject *pyBwGetIntervals(pyBigWigFile_t *pybw, PyObject *args, PyObject *kwds); static PyObject *pyBBGetEntries(pyBigWigFile_t *pybw, PyObject *args, PyObject *kwds); static PyObject *pyBBGetSQL(pyBigWigFile_t *pybw, PyObject *args); static PyObject *pyBwGetHeader(pyBigWigFile_t *pybw, PyObject *args); static PyObject *pyBwAddHeader(pyBigWigFile_t *pybw, PyObject *args, PyObject *kwds); static PyObject *pyBwAddEntries(pyBigWigFile_t *pybw, PyObject *args, PyObject *kwds); static void pyBwDealloc(pyBigWigFile_t *pybw); //The function types aren't actually correct... static PyMethodDef bwMethods[] = { {"open", (PyCFunction)pyBwOpen, METH_VARARGS, "Open a bigWig or bigBed file. For remote files, give a URL starting with HTTP,\n\ FTP, or HTTPS.\n\ \n\ Optional arguments:\n\ mode: An optional mode. The default is 'r', which opens a file for reading.\n\ If you specify a mode containing 'w' then you'll instead open a file\n\ for writing. Note that you then need to add an appropriate header\n\ before use. For bigBed files, only reading is supported.\n\ \n\ Returns:\n\ A bigWigFile object on success, otherwise None.\n\ \n\ Arguments:\n\ file: The name of a bigWig file.\n\ \n\ >>> import pyBigWig\n\ >>> bw = pyBigWig.open(\"some_file.bw\")\n"}, {NULL, NULL, 0, NULL} }; static PyMethodDef bwObjMethods[] = { {"header", (PyCFunction)pyBwGetHeader, METH_VARARGS, "Returns the header of a bigWig file. This contains information such as: \n\ * The version number of the file ('version').\n\ * The number of zoom levels ('nLevels').\n\ * The number of bases covered ('nBasesCovered').\n\ * The minimum value ('minVal').\n\ * The maximum value ('maxVal').\n\ * The sum of all values ('sumData').\n\ * The sum of the square of all values ('sumSquared').\n\ These are returned as a dictionary.\n\ \n\ >>> import pyBigWig\n\ >>> bw = pyBigWig.open(\"some_file.bw\")\n\ >>> bw.header()\n\ {'maxVal': 2L, 'sumData': 272L, 'minVal': 0L, 'version': 4L,\n\ 'sumSquared': 500L, 'nLevels': 1L, 'nBasesCovered': 154L}\n\ >>> bw.close()\n"}, {"close", (PyCFunction)pyBwClose, METH_VARARGS, "Close a bigWig file.\n\ \n\ >>> import pyBigWig\n\ >>> bw = pyBigWig.open(\"some_file.bw\")\n\ >>> bw.close()\n"}, {"isBigWig", (PyCFunction)pyIsBigWig, METH_VARARGS, "Returns True if the object is a bigWig file (otherwise False).\n\ >>> import pyBigWig\n\ >>> bw = pyBigWig.open(\"some_file.bigWig\")\n\ >>> bw.isBigWig()\n\ True\n\ >>> bw.isBigBed()\n\ False\n"}, {"isBigBed", (PyCFunction)pyIsBigBed, METH_VARARGS, "Returns true if the object is a bigBed file (otherwise False).\n\ >>> import pyBigWig\n\ >>> bw = pyBigWig.open(\"some_file.bigBed\")\n\ >>> bw.isBigWig()\n\ False\n\ >>> bw.isBigBed()\n\ True\n"}, {"chroms", (PyCFunction)pyBwGetChroms, METH_VARARGS, "Return a chromosome: length dictionary. The order is typically not\n\ alphabetical and the lengths are long (thus the 'L' suffix).\n\ \n\ Optional arguments:\n\ chrom: An optional chromosome name\n\ \n\ Returns:\n\ A list of chromosome lengths or a dictionary of them.\n\ \n\ >>> import pyBigWig\n\ >>> bw = pyBigWig.open(\"test/test.bw\")\n\ >>> bw.chroms()\n\ {'1': 195471971L, '10': 130694993L}\n\ \n\ Note that you may optionally supply a specific chromosome:\n\ \n\ >>> bw.chroms(\"chr1\")\n\ 195471971L\n\ \n\ If you specify a non-existant chromosome then no output is produced:\n\ \n\ >>> bw.chroms(\"foo\")\n\ >>>\n"}, {"stats", (PyCFunction)pyBwGetStats, METH_VARARGS|METH_KEYWORDS, "Return summary statistics for a given range. On error, this function throws a\n\ runtime exception.\n\ \n\ Positional arguments:\n\ chr: Chromosome name\n\ \n\ Keyword arguments:\n\ start: Starting position\n\ end: Ending position\n\ type: Summary type (mean, min, max, coverage, std), default 'mean'.\n\ nBins: Number of bins into which the range should be divided before\n\ computing summary statistics. The default is 1.\n\ exact: By default, pyBigWig uses the same method as Kent's tools from UCSC\n\ for computing statistics. This means that 'zoom levels' may be\n\ used, rather than actual values (please see the pyBigWig repository\n\ on github for further information on this). To avoid this behaviour,\n\ simply specify 'exact=True'. Note that values returned will then\n\ differ from what UCSC, IGV, and similar other tools will report.\n\ \n\ >>> import pyBigWig\n\ >>> bw = pyBigWig.open(\"test/test.bw\")\n\ >>> bw.stats(\"1\", 0, 3)\n\ [0.2000000054637591]\n\ \n\ This is the mean value over the range 1:1-3 (in 1-based coordinates). If\n\ the start and end positions aren't given the entire chromosome is used.\n\ There are additional optional parameters 'type' and 'nBins'. 'type'\n\ specifies the type of summary information to calculate, which is 'mean'\n\ by default. Other possibilites for 'type' are: 'min' (minimum value),\n\ 'max' (maximum value), 'coverage' (number of covered bases), and 'std'\n\ (standard deviation). 'nBins' defines how many bins the region will be\n\ divided into and defaults to 1.\n\ \n\ >>> bw.stats(\"1\", 0, 3, type=\"min\")\n\ [0.10000000149011612]\n\ >>> bw.stats(\"1\", 0, 3, type=\"max\")\n\ [0.30000001192092896]\n\ >>> bw.stats(\"1\", 0, 10, type=\"coverage\")\n\ [0.30000000000000004]\n\ >>> bw.stats(\"1\", 0, 3, type=\"std\")\n\ [0.10000000521540645]\n\ >>> bw.stats(\"1\",99,200, type=\"max\", nBins=2)\n\ [1.399999976158142, 1.5]\n"}, #ifdef WITHNUMPY {"values", (PyCFunction)pyBwGetValues, METH_VARARGS|METH_KEYWORDS, "Retrieve the value stored for each position (or None). On error, a runtime\n\ exception is thrown.\n\ \n\ Positional arguments:\n\ chr: Chromosome name\n\ start: Starting position\n\ end: Ending position\n\ \n\ Optional arguments:\n\ numpy: If True, return a numpy array rather than a list of values. This\n\ is generally more memory efficient. Note that this option is only\n\ available if pyBigWig was installed with numpy support (check the\n\ pyBigWig.numpy() function).\n\ \n\ >>> import pyBigWig\n\ >>> bw = pyBigWig.open(\"test/test.bw\")\n\ >>> bw.values(\"1\", 0, 3)\n\ [0.10000000149011612, 0.20000000298023224, 0.30000001192092896]\n\ \n\ The length of the returned list will always match the length of the\n\ range. Any uncovered bases will have a value of None.\n\ \n\ >>> bw.values(\"1\", 0, 4)\n\ [0.10000000149011612, 0.20000000298023224, 0.30000001192092896, None]\n\ \n"}, #else {"values", (PyCFunction)pyBwGetValues, METH_VARARGS, "Retrieve the value stored for each position (or None). On error, a runtime\n\ exception is thrown.\n\ \n\ Positional arguments:\n\ chr: Chromosome name\n\ start: Starting position\n\ end: Ending position\n\ \n\ >>> import pyBigWig\n\ >>> bw = pyBigWig.open(\"test/test.bw\")\n\ >>> bw.values(\"1\", 0, 3)\n\ [0.10000000149011612, 0.20000000298023224, 0.30000001192092896]\n\ \n\ The length of the returned list will always match the length of the\n\ range. Any uncovered bases will have a value of None.\n\ \n\ >>> bw.values(\"1\", 0, 4)\n\ [0.10000000149011612, 0.20000000298023224, 0.30000001192092896, None]\n\ \n"}, #endif {"intervals", (PyCFunction)pyBwGetIntervals, METH_VARARGS|METH_KEYWORDS, "Retrieve each interval covering a part of a chromosome/region. On error, a\n\ runtime exception is thrown.\n\ \n\ Positional arguments:\n\ chr: Chromosome name\n\ \n\ Keyword arguments:\n\ start: Starting position\n\ end: Ending position\n\ \n\ If start and end aren't specified, the entire chromosome is returned.\n\ The returned object is a tuple containing the starting position, end\n\ position, and value of each interval in the file. As with all bigWig\n\ positions, those returned are 0-based half-open (e.g., a start of 0 and\n\ end of 10 specifies the first 10 positions).\n\ \n\ >>> import pyBigWig\n\ >>> bw = pyBigWig.open(\"test/test.bw\")\n\ >>> bw.intervals(\"1\", 0, 3)\n\ ((0, 1, 0.10000000149011612), (1, 2, 0.20000000298023224),\n\ (2, 3, 0.30000001192092896))\n\ >>> bw.close()"}, {"entries", (PyCFunction) pyBBGetEntries, METH_VARARGS|METH_KEYWORDS, "Retrieves entries from a bigBed file. These can optionally contain the string\n\ associated with each entry.\n\ \n\ Positional arguments:\n\ chr: Chromosome name\n\ \n\ Keyword arguments:\n\ start: Starting position\n\ end: Ending position\n\ withString: If True, return the string associated with each entry.\n\ Default True.\n\ \n\ The output is a list of tuples, with members \"start\", \"end\", and \"string\"\n\ (assuming \"withString=True\"). If there are no overlapping entries, then None\n\ is returned.\n\ \n\ >>> import pyBigWig\n\ >>> bb = pyBigWig.open(\"https://www.encodeproject.org/files/ENCFF001JBR/@@download/ENCFF001JBR.bigBed\")\n\ >>> print(bw.entries('chr1',10000000,10020000))\n\ [(10009333, 10009640, '61035\t130\t-\t0.026\t0.42\t404'),\n\ (10014007, 10014289, '61047\t136\t-\t0.029\t0.42\t404'),\n\ (10014373, 10024307, '61048\t630\t-\t5.420\t0.00\t2672399')]\n\ >>> print(bb.entries(\"chr1\", 10000000, 10000500, withString=False))\n\ [(10009333, 10009640), (10014007, 10014289), (10014373, 10024307)]\n\ \n"}, {"SQL", (PyCFunction) pyBBGetSQL, METH_VARARGS, "Returns the SQL string associated with the file. This is typically useful for\n\ bigBed files, where this determines what is held in each column of the text\n\ string associated with entries.\n\ \n\ If there is no SQL string, then None is returned.\n\ \n\ >>> import pyBigWig\n\ >>> bb = pyBigWig.open(\"https://www.encodeproject.org/files/ENCFF001JBR/@@download/ENCFF001JBR.bigBed\")\n\ >>> print(bb.SQL())\n\ table RnaElements\n\ \"BED6 + 3 scores for RNA Elements data \"\n\ (\n\ string chrom; \"Reference sequence chromosome or scaffold\"\n\ uint chromStart; \"Start position in chromosome\"\n\ uint chromEnd; \"End position in chromosome\"\n\ string name; \"Name of item\"\n\ uint score; \"Normalized score from 0-1000\"\n\ char[1] strand; \"+ or - or . for unknown\"\n\ float level; \"Expression level such as RPKM or FPKM. Set to -1 for no data.\"\n\ float signif; \"Statistical significance such as IDR. Set to -1 for no data.\"\n\ uint score2; \"Additional measurement/count e.g. number of reads. Set to 0 for no data.\"\n\ )\n\ \n\ \n"}, {"addHeader", (PyCFunction)pyBwAddHeader, METH_VARARGS|METH_KEYWORDS, "Adds a header to a file opened for writing. This MUST be called before adding\n\ any entries. On error, a runtime exception is thrown.\n\ \n\ Positional arguments:\n\ cl: A chromosome list, of the form (('chr1', 1000), ('chr2', 2000), ...).\n\ In other words, each element of the list is a tuple containing a\n\ chromosome name and its associated length.\n\ \n\ Keyword arguments:\n\ maxZooms: The maximum number of zoom levels. The value must be >=0. The\n\ default is 10.\n\ \n\ >>> import pyBigWig\n\ >>> import tempfile\n\ >>> import os\n\ >>> ofile = tempfile.NamedTemporaryFile(delete=False)\n\ >>> oname = ofile.name\n\ >>> ofile.close()\n\ >>> bw = pyBigWig.open(oname, 'w')\n\ >>> bw.addHeader([(\"1\", 1000000), (\"2\", 1500000)], maxZooms=0)\n\ >>> bw.close()\n\ >>> os.remove(oname)"}, {"addEntries", (PyCFunction)pyBwAddEntries, METH_VARARGS|METH_KEYWORDS, "Adds one or more entries to a bigWig file. This returns nothing, but throws a\n\ runtime exception on error.\n\ \n\ This function always accepts an optional 'validate' option. If set to 'True',\n\ which is the default, the input entries are checked to ensure that they come\n\ after previously entered entries. This comes with significant overhead, so if\n\ this is instead 'False' then this validation is not performed.\n\ \n\ There are three manners in which entries can be stored in bigWig files.\n\ \n\ \n\ bedGraph-like entries (12 bytes each):\n\ \n\ Positional arguments:\n\ chrom: A list of chromosome. These MUST match those added with addHeader().\n\ starts: A list of start positions. These are 0-based.\n\ \n\ Keyword arguments:\n\ ends: A list of end positions. These are 0-based half open, so a start of\n\ 0 and end of 10 specifies the first 10 bases.\n\ values: A list of values.\n\ \n\ \n\ Variable-step entries (8 bytes each):\n\ \n\ Positional arguments:\n\ chrom: A chromosome name. This MUST match one added with addHeader().\n\ starts: A list of start positions. These are 0-based.\n\ \n\ Keyword arguments:\n\ values: A list of values.\n\ span: A span width. This is an integer value and specifies how many bases\n\ each entry describes. An entry with a start position of 0 and a span\n\ of 10 describes the first 10 bases.\n\ \n\ \n\ Fixed-step entries (4 bytes each):\n\ \n\ Positional arguments:\n\ chrom: A chromosome name. This MUST match one added with addHeader().\n\ starts: A start position. These are 0-based. The start position of each\n\ entry starts 'step' after the previous and describes 'span' bases.\n\ \n\ Keyword arguments:\n\ values: A list of values.\n\ span: A span width. This is an integer value and specifies how many bases\n\ each entry describes. An entry with a start position of 0 and a span\n\ of 10 describes the first 10 bases.\n\ step: A step width. Each subsequent entry begins this number of bases\n\ after the previous. So if the first entry has a start of 0 and step\n\ or 30, the second entry will start at 30.\n\ \n\ >>> import pyBigWig\n\ >>> import tempfile\n\ >>> import os\n\ >>> ofile = tempfile.NamedTemporaryFile(delete=False)\n\ >>> oname = ofile.name\n\ >>> ofile.close()\n\ >>> bw = pyBigWig.open(oname, 'w')\n\ >>> bw.addHeader([(\"1\", 1000000), (\"2\", 1500000)])\n\ >>> #Add some bedGraph-like entries\n\ >>> bw.addEntries([\"1\", \"1\", \"1\"], [0, 100, 125], ends=[5, 120, 126], values=[0.0, 1.0, 200.0])\n\ >>> #Variable-step entries, the span 500-520, 600-620, and 635-655\n\ >>> bw.addEntries(\"1\", [500, 600, 635], values=[-2.0, 150.0, 25.0], span=20)\n\ >>> #Fixed-step entries, the bases described are 900-920, 930-950, and 960-980\n\ >>> bw.addEntries(\"1\", 900, values=[-5.0, -20.0, 25.0], span=20, step=30)\n\ >>> #This only works due to using validate=False. Obviously the file is then corrupt.\n\ >>> bw.addEntries([\"1\", \"1\", \"1\"], [0, 100, 125], ends=[5, 120, 126], values=[0.0, 1.0, 200.0], validate=False)\n\ >>> bw.close()\n\ >>> os.remove(oname)"}, {"__enter__", (PyCFunction)pyBwEnter, METH_NOARGS, NULL}, {"__exit__", (PyCFunction)pyBwClose, METH_VARARGS, NULL}, {NULL, NULL, 0, NULL} }; #if PY_MAJOR_VERSION >= 3 struct pyBigWigmodule_state { PyObject *error; }; #define GETSTATE(m) ((struct pyBigWigmodule_state*)PyModule_GetState(m)) static PyModuleDef pyBigWigmodule = { PyModuleDef_HEAD_INIT, "pyBigWig", "A python module for bigWig file access", -1, bwMethods, NULL, NULL, NULL, NULL }; #endif //Should set tp_dealloc, tp_print, tp_repr, tp_str, tp_members static PyTypeObject bigWigFile = { #if PY_MAJOR_VERSION >= 3 PyVarObject_HEAD_INIT(NULL, 0) #else PyObject_HEAD_INIT(NULL) 0, /*ob_size*/ #endif "pyBigWig.bigWigFile", /*tp_name*/ sizeof(pyBigWigFile_t), /*tp_basicsize*/ 0, /*tp_itemsize*/ (destructor)pyBwDealloc, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ 0, /*tp_compare*/ 0, /*tp_repr*/ 0, /*tp_as_number*/ 0, /*tp_as_sequence*/ 0, /*tp_as_mapping*/ 0, /*tp_hash*/ 0, /*tp_call*/ 0, /*tp_str*/ PyObject_GenericGetAttr, /*tp_getattro*/ PyObject_GenericSetAttr, /*tp_setattro*/ 0, /*tp_as_buffer*/ #if PY_MAJOR_VERSION >= 3 Py_TPFLAGS_DEFAULT, /*tp_flags*/ #else Py_TPFLAGS_HAVE_CLASS, /*tp_flags*/ #endif "bigWig File", /*tp_doc*/ 0, /*tp_traverse*/ 0, /*tp_clear*/ 0, /*tp_richcompare*/ 0, /*tp_weaklistoffset*/ 0, /*tp_iter*/ 0, /*tp_iternext*/ bwObjMethods, /*tp_methods*/ 0, /*tp_members*/ 0, /*tp_getset*/ 0, /*tp_base*/ 0, /*tp_dict*/ 0, /*tp_descr_get*/ 0, /*tp_descr_set*/ 0, /*tp_dictoffset*/ 0, /*tp_init*/ 0, /*tp_alloc*/ 0, /*tp_new*/ 0,0,0,0,0,0 }; pyBigWig-0.3.18/pyBigWigTest/000077500000000000000000000000001400776027600157165ustar00rootroot00000000000000pyBigWig-0.3.18/pyBigWigTest/__init__.py000066400000000000000000000000001400776027600200150ustar00rootroot00000000000000pyBigWig-0.3.18/pyBigWigTest/test.bigBed000066400000000000000000000650141400776027600200010ustar00rootroot00000000000000* 0@table RnaElements "BED6 + 3 scores for RNA Elements data " ( string chrom; "Reference sequence chromosome or scaffold" uint chromStart; "Start position in chromosome" uint chromEnd; "End position in chromosome" string name; "Name of item" uint score; "Normalized score from 0-1000" char[1] strand; "+ or - or . for unknown" float level; "Expression level such as RPKM or FPKM. Set to -1 for no data." float signif; "Statistical significance such as IDR. Set to -1 for no data." uint score2; "Additional measurement/count e.g. number of reads. Set to 0 for no data." ) ]?@@@@xchr1 chr110Cchr15>5+chr2wA chr4(F chr7 W xU=NC1 q0X1#$__)8@m*18 q._3 `!n(|J!S+$SNK5 #5 _I>>M xc```ks3&29 `@ha$]@r`&$a+̏ <3Ja  ;]nL˃Xf2\E/Hreuer>gʆl=9b0a`H A("r:,H\@$XΔs)g . {eώ!3rT0Oh$ >) >]xc```ko35{0Qqc 1Ea&9>i8xc```kL H80Mrc6ՍIP|b+Lsa&y`æ>803PDtfәYt0Hh)t#S6d\@(ʎCf$y T.h$>Q>CQpyBigWig-0.3.18/pyBigWigTest/test.bw000066400000000000000000000312461400776027600172350ustar00rootroot00000000000000&X0+f?@q@h=F@x1c 10Q?xc`  Ξ9c l; ijfδKӀx4u xcd``8: pHb)X.h$,7,xc` f@3 썍'8=~ (p#R J ( h$XfX/7&pyBigWig-0.3.18/pyBigWigTest/test.py000066400000000000000000000316251400776027600172560ustar00rootroot00000000000000import pyBigWig import tempfile import os import sys import hashlib import numpy as np class TestRemote(): fname = "http://raw.githubusercontent.com/dpryan79/pyBigWig/master/pyBigWigTest/test.bw" def doOpen(self): bw = pyBigWig.open(self.fname) assert(bw is not None) return bw def doOpenWith(self): with pyBigWig.open(self.fname) as bw: assert(bw.chroms() == {'1': 195471971, '10': 130694993}) def doChroms(self, bw): assert(bw.chroms() == {'1': 195471971, '10': 130694993}) assert(bw.chroms("1") == 195471971) assert(bw.chroms("c") is None) def doHeader(self, bw): assert(bw.header() == {'maxVal': 2, 'sumData': 272, 'minVal': 0, 'version': 4, 'sumSquared': 500, 'nLevels': 1, 'nBasesCovered': 154}) def doStats(self, bw): assert(bw.stats("1", 0, 3) == [0.2000000054637591]) assert(bw.stats("1", 0, 3, type="max") == [0.30000001192092896]) assert(bw.stats("1",99,200, type="max", nBins=2) == [1.399999976158142, 1.5]) assert(bw.stats("1",np.int64(99), np.int64(200), type="max", nBins=2) == [1.399999976158142, 1.5]) assert(bw.stats("1") == [1.3351851569281683]) def doValues(self, bw): assert(bw.values("1", 0, 3) == [0.10000000149011612, 0.20000000298023224, 0.30000001192092896]) assert(bw.values("1", np.int64(0), np.int64(3)) == [0.10000000149011612, 0.20000000298023224, 0.30000001192092896]) #assert(bw.values("1", 0, 4) == [0.10000000149011612, 0.20000000298023224, 0.30000001192092896, 'nan']) def doIntervals(self, bw): assert(bw.intervals("1", 0, 3) == ((0, 1, 0.10000000149011612), (1, 2, 0.20000000298023224), (2, 3, 0.30000001192092896))) assert(bw.intervals("1", np.int64(0), np.int64(3)) == ((0, 1, 0.10000000149011612), (1, 2, 0.20000000298023224), (2, 3, 0.30000001192092896))) assert(bw.intervals("1") == ((0, 1, 0.10000000149011612), (1, 2, 0.20000000298023224), (2, 3, 0.30000001192092896), (100, 150, 1.399999976158142), (150, 151, 1.5))) def doSum(self, bw): assert(bw.stats("1", 100, 151, type="sum", nBins=2) == [35.0, 36.5]) def doWrite(self, bw): ofile = tempfile.NamedTemporaryFile(delete=False) oname = ofile.name ofile.close() bw2 = pyBigWig.open(oname, "w") assert(bw2 is not None) #Since this is an unordered dict(), iterating over the items can swap the order! chroms = [("1", bw.chroms("1")), ("10", bw.chroms("10"))] assert(len(bw.chroms()) == 2) bw2.addHeader(chroms, maxZooms=1) #Copy the input file for c in chroms: ints = bw.intervals(c[0]) chroms2 = [] starts = [] ends = [] values = [] for entry in ints: chroms2.append(c[0]) starts.append(entry[0]) ends.append(entry[1]) values.append(entry[2]) bw2.addEntries(chroms2, starts, ends=ends, values=values) bw2.close() #Ensure that the copied file has the same entries and max/min/etc. bw2 = pyBigWig.open(oname) assert(bw.header() == bw2.header()) assert(bw.chroms() == bw2.chroms()) for c in chroms: ints1 = bw.intervals(c[0]) ints2 = bw2.intervals(c[0]) assert(ints1 == ints2) bw.close() bw2.close() #Clean up os.remove(oname) def doWrite2(self): ''' Test all three modes of storing entries. Also test to ensure that we get error messages when doing something silly This is a modified version of the writing example from libBigWig ''' chroms = ["1"]*6 starts = [0, 100, 125, 200, 220, 230, 500, 600, 625, 700, 800, 850] ends = [5, 120, 126, 205, 226, 231] values = [0.0, 1.0, 200.0, -2.0, 150.0, 25.0, 0.0, 1.0, 200.0, -2.0, 150.0, 25.0, -5.0, -20.0, 25.0, -5.0, -20.0, 25.0] ofile = tempfile.NamedTemporaryFile(delete=False) oname = ofile.name ofile.close() bw = pyBigWig.open(oname, "w") bw.addHeader([("1", 1000000), ("2", 1500000)]) #Intervals bw.addEntries(chroms[0:3], starts[0:3], ends=ends[0:3], values=values[0:3]) bw.addEntries(chroms[3:6], starts[3:6], ends=ends[3:6], values=values[3:6]) #IntervalSpans bw.addEntries("1", starts[6:9], values=values[6:9], span=20) bw.addEntries("1", starts[9:12], values=values[9:12], span=20) #IntervalSpanSteps, this should instead take an int bw.addEntries("1", 900, values=values[12:15], span=20, step=30) bw.addEntries("1", 990, values=values[15:18], span=20, step=30) #Attempt to add incorrect values. These MUST raise an exception try: bw.addEntries(chroms[0:3], starts[0:3], ends=ends[0:3], values=values[0:3]) assert(1==0) except RuntimeError: pass try: bw.addEntries("1", starts[6:9], values=values[6:9], span=20) assert(1==0) except RuntimeError: pass try: bw.addEntries("3", starts[6:9], values=values[6:9], span=20) assert(1==0) except RuntimeError: pass try: bw.addEntries("1", 900, values=values[12:15], span=20, step=30) assert(1==0) except RuntimeError: pass #Add a few intervals on a new chromosome bw.addEntries(["2"]*3, starts[0:3], ends=ends[0:3], values=values[0:3]) bw.close() #check md5sum, this is the simplest method to check correctness h = hashlib.md5(open(oname, "rb").read()).hexdigest() assert(h=="ef104f198c6ce8310acc149d0377fc16") #Clean up os.remove(oname) def doWriteEmpty(self): ofile = tempfile.NamedTemporaryFile(delete=False) oname = ofile.name ofile.close() bw = pyBigWig.open(oname, "w") bw.addHeader([("1", 1000000), ("2", 1500000)]) bw.close() #check md5sum h = hashlib.md5(open(oname, "rb").read()).hexdigest() assert(h=="361c600e5badf0b45d819552a7822937") #Ensure we can open and get reasonable results bw = pyBigWig.open(oname) assert(bw.chroms() == {'1': 1000000, '2': 1500000}) assert(bw.intervals("1") == None) assert(bw.values("1", 0, 1000000) == []) assert(bw.stats("1", 0, 1000000, nBins=2) == [None, None]) bw.close() #Clean up os.remove(oname) def doWriteNumpy(self): ofile = tempfile.NamedTemporaryFile(delete=False) oname = ofile.name ofile.close() bw = pyBigWig.open(oname, "w") bw.addHeader([("chr1", 100), ("chr2", 150), ("chr3", 200), ("chr4", 250)]) chroms = np.array(["chr1"] * 2 + ["chr2"] * 2 + ["chr3"] * 2 + ["chr4"] * 2) starts = np.array([0, 10, 40, 50, 60, 70, 80, 90], dtype=np.int64) ends = np.array([5, 15, 45, 55, 65, 75, 85, 95], dtype=np.int64) values0 = np.array(np.random.random_sample(8), dtype=np.float64) bw.addEntries(chroms, starts, ends=ends, values=values0) bw.close() vals = [(x, y, z) for x, y, z in zip(starts, ends, values0)] bw = pyBigWig.open(oname) assert(bw.chroms() == {'chr1': 100, 'chr2': 150, 'chr3': 200, 'chr4': 250}) for idx1, chrom in enumerate(["chr1", "chr2", "chr3", "chr4"]): for idx2, tup in enumerate(bw.intervals(chrom)): assert(tup[0] == starts[2 * idx1 + idx2]) assert(tup[1] == ends[2 * idx1 + idx2]) assert(np.isclose(tup[2], values0[2 * idx1 + idx2])) bw.close() #Clean up os.remove(oname) def testAll(self): bw = self.doOpen() self.doChroms(bw) if not self.fname.startswith("http"): self.doHeader(bw) self.doStats(bw) self.doSum(bw) self.doValues(bw) self.doIntervals(bw) self.doWrite(bw) self.doOpenWith() self.doWrite2() self.doWriteEmpty() self.doWriteNumpy() bw.close() class TestLocal(): def testFoo(self): blah = TestRemote() blah.fname = os.path.dirname(pyBigWig.__file__) + "/pyBigWigTest/test.bw" blah.testAll() class TestBigBed(): def testBigBed(self): fname = os.path.dirname(pyBigWig.__file__) + "/pyBigWigTest/test.bigBed" bb = pyBigWig.open(fname) assert(bb is not None) assert(bb.isBigWig() == 0) assert(bb.isBigBed() == 1) SQL = """table RnaElements "BED6 + 3 scores for RNA Elements data " ( string chrom; "Reference sequence chromosome or scaffold" uint chromStart; "Start position in chromosome" uint chromEnd; "End position in chromosome" string name; "Name of item" uint score; "Normalized score from 0-1000" char[1] strand; "+ or - or . for unknown" float level; "Expression level such as RPKM or FPKM. Set to -1 for no data." float signif; "Statistical significance such as IDR. Set to -1 for no data." uint score2; "Additional measurement/count e.g. number of reads. Set to 0 for no data." ) """ output = bb.SQL() if isinstance(output, bytes): output = output.decode('ASCII') assert(output == SQL) o = bb.entries('chr1',10000000,10020000) expected = [(10009333, 10009640, '61035\t130\t-\t0.026\t0.42\t404'), (10014007, 10014289, '61047\t136\t-\t0.029\t0.42\t404'), (10014373, 10024307, '61048\t630\t-\t5.420\t0.00\t2672399')] assert(o == expected) o = bb.entries('chr1',np.int64(10000000),np.int64(10020000)) assert(o == expected) bb.close() class TestNumpy(): def testNumpy(self): import os if pyBigWig.numpy == 0: return 0 import numpy as np bw = pyBigWig.open("/tmp/delete.bw", "w") bw.addHeader([("1", 1000)], maxZooms=0) # Type 0 chroms = np.array(["1"] * 10) starts = np.array([0, 10, 20, 30, 40, 50, 60, 70, 80, 90], dtype=np.int64) ends = np.array([5, 15, 25, 35, 45, 55, 65, 75, 85, 95], dtype=np.int64) values0 = np.array(np.random.random_sample(10), dtype=np.float64) bw.addEntries(chroms, starts, ends=ends, values=values0) starts = np.array([100, 110, 120, 130, 140, 150, 160, 170, 180, 190], dtype=np.int64) ends = np.array([105, 115, 125, 135, 145, 155, 165, 175, 185, 195], dtype=np.int64) values1 = np.array(np.random.random_sample(10), dtype=np.float64) bw.addEntries(chroms, starts, ends=ends, values=values1) # Type 1, single chrom, multiple starts/values, single span starts = np.array([200, 210, 220, 230, 240, 250, 260, 270, 280, 290], dtype=np.int64) values2 = np.array(np.random.random_sample(10), dtype=np.float64) bw.addEntries(np.str("1"), starts, span=np.int(8), values=values2) starts = np.array([300, 310, 320, 330, 340, 350, 360, 370, 380, 390], dtype=np.int64) values3 = np.array(np.random.random_sample(10), dtype=np.float64) bw.addEntries(np.str("1"), starts, span=np.int(8), values=values3) # Type 2, single chrom/start/span/step, multiple values values4 = np.array(np.random.random_sample(10), dtype=np.float64) bw.addEntries(np.str("1"), np.int(400), span=np.int(8), step=np.int64(2), values=values4) values5 = np.array(np.random.random_sample(10), dtype=np.float64) bw.addEntries(np.str("1"), np.int(500), span=np.int(8), step=np.int64(2), values=values5) bw.close() bw = pyBigWig.open("/tmp/delete.bw") assert(bw is not None) def compy(start, v2): v = [] for t in bw.intervals("1", start, start + 100): v.append(t[2]) v = np.array(v) assert(np.all(np.abs(v - v2) < 1e-5)) compy(0, values0) compy(100, values1) compy(200, values2) compy(300, values3) compy(400, values4) compy(500, values5) # Get values as a numpy array foo = bw.values("1", 0, 100, numpy=False) assert(isinstance(foo, list)) foo = bw.values("1", 0, 100, numpy=True) assert(isinstance(foo, np.ndarray)) bw.close() os.remove("/tmp/delete.bw") def testNumpyValues(self): if pyBigWig.numpy == 0: return 0 import numpy as np fname = "http://raw.githubusercontent.com/dpryan79/pyBigWig/master/pyBigWigTest/test.bw" bw = pyBigWig.open(fname, "r") assert np.allclose( bw.values("1", 0, 20, numpy=True), np.array(bw.values("1", 0, 20), dtype=np.float32), equal_nan=True ) assert np.allclose( bw.stats("1", 0, 20, "mean", 5, numpy=True), np.array(bw.stats("1", 0, 20, "mean", 5), dtype=np.float64), equal_nan=True ) pyBigWig-0.3.18/setup.cfg000066400000000000000000000000501400776027600151510ustar00rootroot00000000000000[metadata] description-file = README.md pyBigWig-0.3.18/setup.py000077500000000000000000000071631400776027600150610ustar00rootroot00000000000000#!/usr/bin/env python from setuptools import setup, Extension, find_packages from distutils import sysconfig import subprocess import glob import sys try: from numpy.distutils.misc_util import get_info from os.path import dirname WITHNUMPY = True except: WITHNUMPY = False srcs = [x for x in glob.glob("libBigWig/*.c")] srcs.append("pyBigWig.c") libs=["m", "z"] # do not link to python on mac, see https://github.com/deeptools/pyBigWig/issues/58 if 'dynamic_lookup' not in (sysconfig.get_config_var('LDSHARED') or ''): if sysconfig.get_config_vars('BLDLIBRARY') is not None: #Note the "-l" prefix! for e in sysconfig.get_config_vars('BLDLIBRARY')[0].split(): if e[0:2] == "-l": libs.append(e[2:]) elif sys.version_info[0] >= 3 and sys.version_info[1] >= 3: libs.append("python%i.%im" % (sys.version_info[0], sys.version_info[1])) else: libs.append("python%i.%i" % (sys.version_info[0], sys.version_info[1])) additional_libs = [sysconfig.get_config_var("LIBDIR"), sysconfig.get_config_var("LIBPL")] defines = [] try: foo, _ = subprocess.Popen(['curl-config', '--libs'], stdout=subprocess.PIPE).communicate() libs.append("curl") foo = foo.decode().strip().split() except: foo = [] defines.append(('NOCURL', None)) sys.stderr.write("Either libcurl isn't installed, it didn't come with curl-config, or curl-config isn't in your $PATH. pyBigWig will be installed without support for remote files.\n") for v in foo: if v[0:2] == '-L': additional_libs.append(v[2:]) include_dirs = ['libBigWig', sysconfig.get_config_var("INCLUDEPY")] if WITHNUMPY is True: defines.extend([('WITHNUMPY', None), ('NPY_NO_DEPRECATED_API', 'NPY_1_7_API_VERSION')]) extra_info = get_info('npymath') include_dirs.extend(extra_info['include_dirs']) libs.extend(extra_info['libraries']) extra_info['library_dirs'].extend(additional_libs) additional_libs = extra_info['library_dirs'] module1 = Extension('pyBigWig', sources = srcs, libraries = libs, library_dirs = additional_libs, define_macros = defines, include_dirs = include_dirs) setup(name = 'pyBigWig', version = '0.3.18', description = 'A package for accessing bigWig files using libBigWig', author = "Devon P. Ryan", author_email = "ryan@ie-freiburg.mpg.de", url = "https://github.com/dpryan79/pyBigWig", download_url = "https://github.com/dpryan79/pyBigWig/tarball/0.3.13", keywords = ["bioinformatics", "bigWig", "bigBed"], classifier = ["Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "License :: OSI Approved", "Programming Language :: C", "Programming Language :: Python", "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: Implementation :: CPython", "Operating System :: POSIX", "Operating System :: Unix", "Operating System :: MacOS"], packages = find_packages(), include_package_data = True, extras_require = {'numpy input': ["numpy"]}, ext_modules = [module1])