pax_global_header00006660000000000000000000000064145676477730014544gustar00rootroot0000000000000052 comment=c9f482caf38501290890a919f6e157dcea171cda jluttine-truncnorm-c9f482c/000077500000000000000000000000001456764777300160535ustar00rootroot00000000000000jluttine-truncnorm-c9f482c/.github/000077500000000000000000000000001456764777300174135ustar00rootroot00000000000000jluttine-truncnorm-c9f482c/.github/workflows/000077500000000000000000000000001456764777300214505ustar00rootroot00000000000000jluttine-truncnorm-c9f482c/.github/workflows/release.yml000066400000000000000000000020521456764777300236120ustar00rootroot00000000000000name: Release on: create: tags: - '*' jobs: release-build: runs-on: ubuntu-latest environment: release steps: - name: Check out the repository uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v4 with: python-version: "3.x" - name: Install dependecies run: | pip install setuptools_scm - name: Build release distributions run: | python setup.py egg_info -Db '' sdist - name: Upload distributions uses: actions/upload-artifact@v4 with: name: release-dists path: dist/ pypi-publish: runs-on: ubuntu-latest environment: release needs: - release-build permissions: id-token: write steps: - name: Retrieve release distributions uses: actions/download-artifact@v4 with: name: release-dists path: dist/ - name: Publish release distributions to PyPI uses: pypa/gh-action-pypi-publish@release/v1 jluttine-truncnorm-c9f482c/.gitignore000066400000000000000000000001371456764777300200440ustar00rootroot00000000000000.envrc *~ *.pyc __pycache__ *.egg-info dist .hypothesis build .direnv *.log result _autosummaryjluttine-truncnorm-c9f482c/LICENSE000066400000000000000000000020721456764777300170610ustar00rootroot00000000000000The MIT License (MIT) Copyright (c) 2019 Jaakko Luttinen Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. jluttine-truncnorm-c9f482c/README.md000066400000000000000000000027131456764777300173350ustar00rootroot00000000000000# TruncNorm Arbitrary order moments for truncated multivariate normal distributions. ## Introduction Given ``` X ~ N(m, C), a <= X <= b ``` with mean vector `m`, covariance matrix `C`, lower limit vector `a` and upper limit vector `b`, ``` python import truncnorm truncnorm.moments(m, C, a, b, 4) ``` returns all the following moments of total order less or equal to 4 as a list: ``` [ P(a<=X<=b), (scalar) E[X_i], (N vector) E[X_i*X_j], (NxN matrix) E[X_i*X_j*X_k], (NxNxN array) E[X_i*X_j*X_k*X_l], (NxNxNxN array) ] ``` for all `i`, `j`, `k` and `l`. Note that the first element in the list is a bit of a special case. That's because `E[1]` is trivially `1` so giving the normalisation constant instead is much more useful. ## TODO - Double truncation - Numerical stability could probably be increased by using logarithic scale in critical places of the algorithm - Sampling (see Gessner et al below) - Folded distribution - Optimize recurrent integrals by using vector and index-mapping representation instead of arrays. Using arrays makes computations efficient and simple, but same elements are computed multiple times because of symmetry in the moments. ## References - "On Moments of Folded and Truncated Multivariate Normal Distributions" by Raymond Kan & Cesare Robotti, 2016 - "Integrals over Gaussians under Linear Domain Constraints" by Alexandra Gessner & Oindrila Kanjilal & Philipp Hennig, 2020 jluttine-truncnorm-c9f482c/default.nix000066400000000000000000000005611456764777300202210ustar00rootroot00000000000000let pkgs = import { }; ps = pkgs.python3Packages; in ps.buildPythonPackage rec { name = "truncnorm"; doCheck = false; src = ./.; postShellHook = '' export PYTHONPATH=$(pwd):$PYTHONPATH ''; depsBuildBuild = with ps; [ ipython pip setuptools_scm pkgs.git ]; propagatedBuildInputs = with ps; [ numpy scipy ]; } jluttine-truncnorm-c9f482c/setup.py000066400000000000000000000032351456764777300175700ustar00rootroot00000000000000import os from setuptools import setup, find_packages if __name__ == "__main__": def read(fname): return open(os.path.join(os.path.dirname(__file__), fname)).read() meta = {} base_dir = os.path.dirname(os.path.abspath(__file__)) with open(os.path.join(base_dir, 'truncnorm', '_meta.py')) as fp: exec(fp.read(), meta) setup( name="truncnorm", author=meta["__author__"], author_email=meta["__contact__"], description="Moments for doubly truncated multivariate normal distributions", project_urls={ "Homepage": "https://github.com/jluttine/truncnorm", "Download": "https://pypi.org/project/truncnorm/", "Bug reports": "https://github.com/jluttine/truncnorm/issues", "Contributing": "https://github.com/jluttine/truncnorm/pulls", }, packages=find_packages(), use_scm_version=True, setup_requires=[ "setuptools_scm", ], install_requires=[ "numpy", "scipy", ], classifiers=[ "Programming Language :: Python :: 3 :: Only", "Development Status :: 3 - Alpha", "Environment :: Console", "Intended Audience :: Developers", "License :: OSI Approved :: {0}".format(meta["__license__"]), "Operating System :: OS Independent", "Programming Language :: Python :: 3 :: Only", "Topic :: Scientific/Engineering", "Topic :: Software Development :: Libraries", ], long_description=read('README.md'), long_description_content_type="text/markdown", ) jluttine-truncnorm-c9f482c/shell.nix000066400000000000000000000000151456764777300176760ustar00rootroot00000000000000(import ./.) jluttine-truncnorm-c9f482c/truncnorm/000077500000000000000000000000001456764777300201025ustar00rootroot00000000000000jluttine-truncnorm-c9f482c/truncnorm/__init__.py000066400000000000000000000000311456764777300222050ustar00rootroot00000000000000from .truncnorm import * jluttine-truncnorm-c9f482c/truncnorm/_meta.py000066400000000000000000000002211456764777300215340ustar00rootroot00000000000000__author__ = "Jaakko Luttinen" __contact__ = "jaakko.luttinen@iki.fi" __copyright__ = "2024, Jaakko Luttinen" __license__ = "MIT License" jluttine-truncnorm-c9f482c/truncnorm/truncnorm.py000066400000000000000000000220641456764777300225070ustar00rootroot00000000000000import itertools import numpy as np from scipy import stats def mvdot(A, b): return np.einsum("...ik,...k->...i", A, b) def diag(A): return np.einsum("...ii->...i", A) def integral(mu, Sigma, a, b): """P(a<=x<=b) for x~N(mu,Sigma) """ N = np.shape(mu)[-1] # If we don't have an upper bound, flip everything and use the lower bound # as the upper bound. if b is None: # Swap the axes b = None if a is None else -a a = None mu = -mu if a is None: if b is None: # Trivial integral return 1 if N == 1: return stats.norm.cdf(b, mu, Sigma[...,0])[...,0] # If only upper bound, we can compute P(x<=b) with a single evaluation # of CDF # # TODO: SciPy multivariate normal has no support for arrays, so we need # to loop over each ourselves sh = np.broadcast_shapes( np.shape(b)[:-1], np.shape(mu)[:-1], np.shape(Sigma)[:-2], ) N = np.shape(mu)[-1] b = np.broadcast_to(b, sh + (N,)) mu = np.broadcast_to(mu, sh + (N,)) Sigma = np.broadcast_to(Sigma, sh + (N,N)) p = np.zeros(sh) for ind in itertools.product(*(np.arange(s) for s in sh)): p[ind] = stats.multivariate_normal.cdf( b[ind], mean=mu[ind], cov=Sigma[ind], ) return p if N == 1: s = np.sqrt(Sigma)[...,0] p = stats.norm.cdf(b, mu, s) - stats.norm.cdf(a, mu, s) return p[...,0] raise NotImplementedError() # TODO: Broadcasting y = np.stack([a,b]) [ ((1, bi,),) if np.isneginf(ai) else ((0, ai), (1, bi)) for (ai, bi) in zip(a, b) ] # To get max benefit of the below optimization, flip dimensions for which # a[i]>-inf but b[i]=inf # flip = ~np.isneginf(a) & np.isposinf(b) # b = np.where(flip, -a, b) # a = np.where(flip, -np.inf, a) # mu = np.where(flip, -mu, mu) # Optimization: skip combinations with any a[i]=-inf [ ((1, bi,),) if np.isneginf(ai) else ((0, ai), (1, bi)) for (ai, bi) in zip(a, b) ] P = 0 for (i, y) in foo: # TODO: Maybe discard dimensions with b[i]=inf so it's lower dimensional? pass def _remove_diag(A): """Removes diagonal elements from a matrix""" # See: https://stackoverflow.com/a/46736275 return A[~np.eye(A.shape[0],dtype=bool)].reshape(A.shape[0],-1) def _remove_each_row_and_column(x): n = np.shape(x)[-1] inds = _remove_diag( np.repeat(np.arange(n)[None,:], n, axis=0) ) y = x[...,inds,:] return np.take_along_axis( y, np.broadcast_to( inds[...,None,:], np.shape(y)[:-1] + (n-1,), ), axis=-1, ) def _remove_each_column(x): n = np.shape(x)[-1] inds = _remove_diag( np.repeat(np.arange(n)[None,:], n, axis=0) ) return x[...,inds] def _remove_each_row(x): n = np.shape(x)[-1] inds = _remove_diag( np.repeat(np.arange(n)[None,:], n, axis=0) ) return x[...,inds,:] def _geometric_sum(x, a, b): """sum_a^{b-1} x**i""" z = 1 - x return np.where( a >= b, 0, np.where( z == 0, b - a, # case x==1 (x**a - x**b) / z, ) ) def _get_g(G, k, N, m): # We are interested in G[i][...] i = np.sum(k, axis=-1) # How many (N-1) length axes there are in total for all G[j] for j 0: def compute_lower_dimensional_integrals(y): Sigmaj = _remove_each_row(Sigma) Sigmajj = _remove_each_row_and_column(Sigma) muj = _remove_each_column(mu) # Shapes: # # Gs[0] :: (...) + () # Gs[1] :: (...) + (N-1) # Gs[2] :: (...) + (N-1,N-1) # Gs[3] :: (...) + (N-1,N-1,N-1) Gs = _recurrent_integrals( muj + np.einsum("...jij,...j->...ji", Sigmaj, (y - mu) / s2), Sigmajj - np.einsum("...jaj,...jbj->...jab", Sigmaj, Sigmaj) / s2[...,None,None], None if a is None else _remove_each_column(a), None if b is None else _remove_each_column(b), m - 1, ) # Put all in one huge vector for more efficient accessing sh = np.shape(Gs[0]) return np.concatenate( [np.reshape(Gi, sh + (-1,)) for Gi in Gs], axis=-1, ) Ga = ( None if a is None else compute_lower_dimensional_integrals(a) ) Gb = ( None if b is None else compute_lower_dimensional_integrals(b) ) # Compute the different total power integrals # # Shapes: # # Fs[0] :: (...) + () # Fs[1] :: (...) + (N,) # Fs[2] :: (...) + (N,N) # Fs[3] :: (...) + (N,N,N) # and so on Fs = [] Fs.append(np.asarray(integral(mu, Sigma, a, b))) al = a bl = b mul = mu Sigmal = Sigma stdl = np.sqrt(s2) k = np.zeros(N, dtype=int) I = np.eye(N, dtype=int) Il = I for l in range(1, m+1): c1 = ( np.zeros(N) if l < 2 else k * _get_f(Fs[l-2], k[...,None,:] - I, N, ndim=l-2) ) c2 = ( 0 if al is None else np.where( np.isneginf(al), 0, al**k * stats.norm.pdf(al, mul, stdl) * _get_g( Ga, _remove_each_column(k), N, m-1, ), ) ) c3 = ( 0 if bl is None else np.where( np.isposinf(bl), 0, bl**k * stats.norm.pdf(bl, mul, stdl) * _get_g( Gb, _remove_each_column(k), N, m-1, ), ) ) c = c1 + c2 - c3 F = mul * Fs[l-1][...,None] + mvdot(Sigmal, c) Fs.append(F) # Add new axes for the next iteration round al = (None if al is None else al[...,None,:]) bl = (None if bl is None else bl[...,None,:]) mul = mul[...,None,:] stdl = stdl[...,None,:] Sigmal = Sigmal[...,None,:,:] k = k + Il Il = Il[...,None,:] return Fs def moments(mu, Sigma, a, b, m): # Broadcast and convert to arrays # sh = np.broadcast_shapes( # np.shape(mu)[:-1], # np.shape(Sigma)[:-2], # () if a is None else np.shape(a)[:-1], # () if b is None else np.shape(b)[:-1], # ) # N = np.shape(mu)[-1] # mu = np.broadcast_to(mu, sh + (N,)) # Sigma = np.broadcast_to(Sigma, sh + (N,N)) # a = None if a is None else np.broadcast_to(a, sh + (N,)) # b = None if b is None else np.broadcast_to(b, sh + (N,)) Fs = _recurrent_integrals( np.asarray(mu), np.asarray(Sigma), None if a is None else np.asarray(a), None if b is None else np.asarray(b), m, ) L = Fs[0] # Treat the first element a bit differently by not dividing as it would give # trivial 1. But divide the other elements so you'll get the expectations. return [L] + [Fi / L for Fi in Fs[1:]]