pax_global_header00006660000000000000000000000064144175553000014516gustar00rootroot0000000000000052 comment=13b56b2731716acf33fb13087fb0469c5558b320 hmmlearn-0.3.0/000077500000000000000000000000001441755530000133215ustar00rootroot00000000000000hmmlearn-0.3.0/.codecov.yml000066400000000000000000000000171441755530000155420ustar00rootroot00000000000000comment: false hmmlearn-0.3.0/.github/000077500000000000000000000000001441755530000146615ustar00rootroot00000000000000hmmlearn-0.3.0/.github/ISSUE_TEMPLATE.md000066400000000000000000000021501441755530000173640ustar00rootroot00000000000000## I am requesting a new feature This library is currently not under active development. Large-scale features are unlikely to be added unless accompanied with a PR, which will be considered by the maintainers to the best of their ability. ## I am reporting a bug If you are reporting a bug, please - Make sure that it occurs with the latest release of hmmlearn (per https://pypi.org/project/hmmlearn/). - Provide a short, self-contained, correct example (http://sscce.org/). Maintainers must be able to reproduce the issue **by copy-pasting your example in a new, empty file/console/notebook**. Avoid including data files as much as possible; if they are absolutely necessary, they must be in csv or npy format, and code for loading the file must be included in the example. Issues not reproducible with the latest release of hmmlearn or without a short, self-contained, correct example **will be closed**. ## I have a question The maintainers cannot offer advice regarding the use of hmmlearn or of hidden Markov models in general. Consider asking on a general forum, e.g. https://stats.stackexchange.com/. hmmlearn-0.3.0/.github/workflows/000077500000000000000000000000001441755530000167165ustar00rootroot00000000000000hmmlearn-0.3.0/.github/workflows/build.yml000066400000000000000000000022151441755530000205400ustar00rootroot00000000000000name: build on: [push, pull_request] jobs: style: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - name: Style run: | shopt -s globstar && ! grep -E '.{80}' **/*.py build: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] cibw-build: ['cp36-*', 'cp37-*', 'cp38-*', 'cp39-*', 'cp310-*', 'cp311-*'] fail-fast: false runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v3 - uses: docker/setup-qemu-action@v2 if: runner.os == 'Linux' - uses: pypa/cibuildwheel@v2.12.1 env: CIBW_ENVIRONMENT: SETUPTOOLS_SCM_PRETEND_VERSION=0.3.0 CIBW_BUILD: ${{ matrix.cibw-build }} CIBW_SKIP: '*-musllinux_*' CIBW_TEST_SKIP: '*-macosx_universal2:arm64' CIBW_ARCHS_LINUX: 'x86_64 aarch64' CIBW_ARCHS_MACOS: 'x86_64 universal2' CIBW_ARCHS_WINDOWS: 'AMD64' CIBW_TEST_REQUIRES: pytest CIBW_TEST_COMMAND: python -mpytest --pyargs hmmlearn.tests - uses: actions/upload-artifact@v3 with: name: wheels path: wheelhouse/*.whl if: ${{ always() }} hmmlearn-0.3.0/.gitignore000066400000000000000000000003071441755530000153110ustar00rootroot00000000000000*.egg-info/ .*.swp *.o *.pyc *.pyd *.so .cache/ .eggs/ .ipynb_checkpoints/ .pytest_cache/ build/ dist/ htmlcov/ oprofile_data/ .coverage .coverage.* .gdb_history # generated by doctest: filename.pkl hmmlearn-0.3.0/.readthedocs.yaml000066400000000000000000000001261441755530000165470ustar00rootroot00000000000000version: 2 python: install: - path: . extra_requirements: - docs hmmlearn-0.3.0/AUTHORS.rst000066400000000000000000000007661441755530000152110ustar00rootroot00000000000000Authors and contributors, in no particular order: * Ron Weiss * Shiqiao Du * Jaques Grobler * David Cournapeau * Fabian Pedregosa * Gael Varoquaux * Andreas Mueller * Bertrand Thirion * Daniel Nouri * Gilles Louppe * Jake Vanderplas * John Benediktsson * Lars Buitinck * Mikhail Korobov * Robert McGibbon * Stefano Lattarini * Vlad Niculae * csytracy * Alexandre Gramfort * Sergei Lebedev * Daniela Huppenkothen * Christopher Farrow * Alexandr Yanenko * Antony Lee * Matthew Danielson * Alex Rockhill hmmlearn-0.3.0/CHANGES.rst000066400000000000000000000146041441755530000151300ustar00rootroot00000000000000hmmlearn Changelog ================== Here you can see the full list of changes between each hmmlearn release. Version 0.3.0 ------------- Released on April 18th, 2023. - Introduce learning HMMs with Variational Inference. Support Gaussian and Categorical Emissions. This feature is provisional and subject to further changes. - Deprecated support for inputs of shape other than ``(n_samples, 1)`` for categorical HMMs. - Removed the deprecated ``iter_from_X_lengths`` and ``log_mask_zero``; ``lengths`` arrays that do not sum up to the entire array length are no longer supported. - Support variable ``n_trials`` in ``MultinomialHMM``, except for sampling. Version 0.2.8 ------------- Released on September 26th, 2022. - The ``PoissonHMM`` class was added with an example use case. - For ``MultinomialHMM``, parameters after ``transmat_prior`` are now keyword-only. - ``startmat_`` and ``transmat_`` will both be initialized with random variables drawn from a Dirichlet distribution, to maintain the old behavior, these must be initialized as ``1 / n_components``. - The old ``MultinomialHMM`` class was renamed to ``CategoricalHMM`` (as that's what it actually implements), and a new ``MultinomialHMM`` class was introduced (with a warning) that actually implements a multinomial distribution. - ``hmmlearn.utils.log_mask_zero`` has been deprecated. Version 0.2.7 ------------- Released on February 10th, 2022. - Dropped support for Py3.5 (due to the absence of manylinux wheel supporting both Py3.5 and Py3.10). - ``_BaseHMM`` has been promoted to public API and has been renamed to ``BaseHMM``. - MultinomialHMM no longer overwrites preset ``n_features``. - An implementation of the Forward-Backward algorithm based upon scaling is available by specifying ``implementation="scaling"`` when instantiating HMMs. In general, the scaling algorithm is more efficient than an implementation based upon logarithms. See `scripts/benchmark.py` for a comparison of the performance of the two implementations. - The *logprob* parameter to `.ConvergenceMonitor.report` has been renamed to *log_prob*. Version 0.2.6 ------------- Released on July 18th, 2021. - Fixed support for multi-sequence GMM-HMM fit. - Deprecated ``utils.iter_from_X_lengths``. - Previously, APIs taking a *lengths* parameter would silently drop the last samples if the total length was less than the number of samples. This behavior is deprecated and will raise an exception in the future. Version 0.2.5 ------------- Released on February 3rd, 2021. - Fixed typo in implementation of covariance maximization for GMMHMM. - Changed history of ConvergenceMonitor to include the whole history for evaluation purposes. It can no longer be assumed that it has a maximum length of two. Version 0.2.4 ------------- Released on September 12th, 2020. .. warning:: GMMHMM covariance maximization was incorrect in this release. This bug was fixed in the following release. - Bumped previously incorrect dependency bound on scipy to 0.19. - Bug fix for 'params' argument usage in GMMHMM. - Warn when an explicitly set attribute would be overridden by ``init_params_``. Version 0.2.3 ------------- Released on December 17th, 2019. Fitting of degenerate GMMHMMs appears to fail in certain cases on macOS; help with troubleshooting would be welcome. - Dropped support for Py2.7, Py3.4. - Log warning if not enough data is passed to fit() for a meaningful fit. - Better handle degenerate fits. - Allow missing observations in input multinomial data. - Avoid repeatedly rechecking validity of Gaussian covariance matrices. Version 0.2.2 ------------- Released on May 5th, 2019. This version was cut in particular in order to clear up the confusion between the "real" v0.2.1 and the pseudo-0.2.1 that were previously released by various third-party packagers. - Custom ConvergenceMonitors subclasses can be used (#218). - MultinomialHMM now accepts unsigned symbols (#258). - The ``get_stationary_distribution`` returns the stationary distribution of the transition matrix (i.e., the rescaled left-eigenvector of the transition matrix that is associated with the eigenvalue 1) (#141). Version 0.2.1 ------------- Released on October 17th, 2018. - GMMHMM was fully rewritten (#107). - Fixed underflow when dealing with logs. Thanks to @aubreyli. See PR #105 on GitHub. - Reduced worst-case memory consumption of the M-step from O(S^2 T) to O(S T). See issue #313 on GitHub. - Dropped support for Python 2.6. It is no longer supported by scikit-learn. Version 0.2.0 ------------- Released on March 1st, 2016. The release contains a known bug: fitting ``GMMHMM`` with covariance types other than ``"diag"`` does not work. This is going to be fixed in the following version. See issue #78 on GitHub for details. - Removed deprecated re-exports from ``hmmlean.hmm``. - Speed up forward-backward algorithms and Viterbi decoding by using Cython typed memoryviews. Thanks to @cfarrow. See PR#82 on GitHub. - Changed the API to accept multiple sequences via a single feature matrix ``X`` and an array of sequence ``lengths``. This allowed to use the HMMs as part of scikit-learn ``Pipeline``. The idea was shamelessly plugged from ``seqlearn`` package by @larsmans. See issue #29 on GitHub. - Removed ``params`` and ``init_params`` from internal methods. Accepting these as arguments was redundant and confusing, because both available as instance attributes. - Implemented ``ConvergenceMonitor``, a class for convergence diagnostics. The idea is due to @mvictor212. - Added support for non-fully connected architectures, e.g. left-right HMMs. Thanks to @matthiasplappert. See issue #33 and PR #38 on GitHub. - Fixed normalization of emission probabilities in ``MultinomialHMM``, see issue #19 on GitHub. - ``GaussianHMM`` is now initialized from all observations, see issue #1 on GitHub. - Changed the models to do input validation lazily as suggested by the scikit-learn guidelines. - Added ``min_covar`` parameter for controlling overfitting of ``GaussianHMM``, see issue #2 on GitHub. - Accelerated M-step fro `GaussianHMM` with full and tied covariances. See PR #97 on GitHub. Thanks to @anntzer. - Fixed M-step for ``GMMHMM``, which incorrectly expected ``GMM.score_samples`` to return log-probabilities. See PR #4 on GitHub for discussion. Thanks to @mvictor212 and @michcio1234. Version 0.1.1 ------------- Initial release, released on February 9th 2015. hmmlearn-0.3.0/LICENSE.txt000066400000000000000000000027671441755530000151600ustar00rootroot00000000000000Copyright (c) 2014, hmmlearn authors and contributors (see AUTHORS.rst) All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the {organization} nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. hmmlearn-0.3.0/README.rst000066400000000000000000000034401441755530000150110ustar00rootroot00000000000000hmmlearn ======== | |GitHub| |PyPI| | |Read the Docs| |Build| |CodeCov| .. |GitHub| image:: https://img.shields.io/badge/github-hmmlearn%2Fhmmlearn-brightgreen :target: https://github.com/hmmlearn/hmmlearn .. |PyPI| image:: https://img.shields.io/pypi/v/hmmlearn.svg?color=brightgreen :target: https://pypi.python.org/pypi/hmmlearn .. |Read the Docs| image:: https://readthedocs.org/projects/hmmlearn/badge/?version=latest :target: http://hmmlearn.readthedocs.io/en/latest/?badge=latest .. |Build| image:: https://img.shields.io/github/actions/workflow/status/hmmlearn/hmmlearn/build.yml?branch=main :target: https://github.com/hmmlearn/hmmlearn/actions .. |CodeCov| image:: https://img.shields.io/codecov/c/github/hmmlearn/hmmlearn :target: https://codecov.io/gh/hmmlearn/hmmlearn hmmlearn is a set of algorithms for **unsupervised** learning and inference of Hidden Markov Models. For supervised learning learning of HMMs and similar models see seqlearn_. .. _seqlearn: https://github.com/larsmans/seqlearn **Note**: This package is under limited-maintenance mode. Important links =============== * Official source code repo: https://github.com/hmmlearn/hmmlearn * HTML documentation (stable release): https://hmmlearn.readthedocs.org/en/stable * HTML documentation (development version): https://hmmlearn.readthedocs.org/en/latest Dependencies ============ The required dependencies to use hmmlearn are * Python >= 3.6 * NumPy >= 1.10 * scikit-learn >= 0.16 You also need Matplotlib >= 1.1.1 to run the examples and pytest >= 2.6.0 to run the tests. Installation ============ Requires a C compiler and Python headers. To install from PyPI:: pip install --upgrade --user hmmlearn To install from the repo:: pip install --user git+https://github.com/hmmlearn/hmmlearn hmmlearn-0.3.0/doc/000077500000000000000000000000001441755530000140665ustar00rootroot00000000000000hmmlearn-0.3.0/doc/.gitignore000066400000000000000000000000351441755530000160540ustar00rootroot00000000000000build/ source/auto_examples/ hmmlearn-0.3.0/doc/Makefile000066400000000000000000000011141441755530000155230ustar00rootroot00000000000000# Minimal makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = python -msphinx SOURCEDIR = source BUILDDIR = build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) hmmlearn-0.3.0/doc/make.bat000066400000000000000000000013561441755530000155000ustar00rootroot00000000000000@ECHO OFF pushd %~dp0 REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=python -msphinx ) set SOURCEDIR=source set BUILDDIR=build if "%1" == "" goto help %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( echo. echo.The Sphinx module was not found. Make sure you have Sphinx installed, echo.then set the SPHINXBUILD environment variable to point to the full echo.path of the 'sphinx-build' executable. Alternatively you may add the echo.Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.http://sphinx-doc.org/ exit /b 1 ) %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% goto end :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% :end popd hmmlearn-0.3.0/doc/source/000077500000000000000000000000001441755530000153665ustar00rootroot00000000000000hmmlearn-0.3.0/doc/source/GMMHMM.rst000066400000000000000000000470001441755530000171030ustar00rootroot00000000000000We'll use Matrix Cookbook (https://www.math.uwaterloo.ca/~hwolkowi/matrixcookbook.pdf) for some useful equations. General form of expectation :math:`Q(\theta, \theta^{old})` of any GMM model looks like this (Bishop, (13.17)): .. math:: Q(\theta, \theta^{old}) = \sum_{k=1}^{K} \gamma(z_{1k})\ln \pi_k + \sum_{n=2}^{N} \sum_{j=1}^{K} \sum_{k=1}^{K} \xi (z_{n-1,j}, z_{nk}) \ln A_{jk} + \sum_{n=1}^{N} \sum_{k=1}^{K} \gamma(z_{nk}) \ln p(x_n | \phi_k) In the case of GMMHMM PDF in the last term looks like this: .. math:: p(x_n | \phi_k) = \sum_{l=1}^{L} \pi_{kl} \mathcal{N}(x_n | \mu_{kl}, \Sigma_{kl}) Thus: .. math:: Q(\theta, \theta^{old}) = \sum_{k=1}^{K} \gamma(z_{1k})\ln \pi_k + \sum_{n=2}^{N} \sum_{j=1}^{K} \sum_{k=1}^{K} \xi (z_{n-1,j}, z_{nk}) \ln A_{jk} + \sum_{n=1}^{N} \sum_{k=1}^{K} \gamma(z_{nk}) \sum_{l=1}^{L} \pi_{kl} \mathcal{N}(x_n | \mu_{kl}, \Sigma_{kl}) Priors for parameters :math:`\pi_{p}`: .. math:: p(\pi_p | \alpha_p) = \frac{1}{\text{B}(\alpha_p)} \prod_{l=1}^{L} \pi_{pl}^{\alpha_{pl} - 1} Priors for parameters :math:`\mu_{pt}`: .. math:: p(\mu_{pt} | \mu_{pt0}, \lambda) = \mathcal{N} (\mu_{pt} | \mu_{pt0}, \frac{1}{\lambda} \Sigma_{pt}) Priors for parameters :math:`\Sigma_{kl}` in 'full' case: .. math:: p(\Sigma_{pt} | \Psi_{pt}, \nu_{pt}) = \frac{\left|\Psi_{pt}\right|^{\frac{\nu_{pt}}{2}}}{2 ^ \frac{\nu_{pt} D} {2} \Gamma_D (\frac{\nu_{pt}}{2})} \left|\Sigma_{pt}\right|^{-\frac{\nu_{pt} + D + 1}{2}} e^{-\frac{1}{2} \text{tr} (\Psi_{pt} \Sigma_{pt}^{-1})} = \text{IW}(\Sigma_{pt} | \Psi_{pt}, \nu_{pt}) Priors for parameters :math:`\Sigma_{kl}` in 'tied' case: .. math:: p(\Sigma_p | \Psi_p, \nu_p) = \text{IW}(\Sigma_p | \Psi_p, \nu_p) Priors for parameters :math:`\sigma_{kld} ^ 2` in 'diag' case: .. math:: p(\sigma_{kld} ^ 2 | \alpha_{kld}, \beta_{kld}) = \frac { \beta_{kld} ^ {\alpha_{kld}} } { \Gamma (\alpha_{kld}) } (\sigma_{kld} ^ 2) ^ {-\alpha_{kld} - 1} \exp \Big( \frac {-\beta_{kld}} {\sigma_{kld}^2} \Big) = \Gamma^{-1}(\sigma_{kld} ^ 2 | \alpha_{kld}, \beta_{kld}) Priors for parameters :math:`\sigma_{kl} ^ 2` in 'spherical' case: .. math:: p(\sigma_{kl} ^ 2 | \alpha_{kl}, \beta_{kl}) = \Gamma^{-1}(\sigma_{kl} ^ 2 | \alpha_{kl}, \beta_{kl}) The whole prior log-distribution: .. math:: \ln p(\pi, \mu, \Sigma) = \sum_{p=1}^{P} \Big(\ln \frac{1}{\text{B}(\alpha_p)} + \sum_{l=1}^{L} (\alpha_{pl} - 1) \ln \pi_{pl}\Big) + \sum_{p=1}^{P} \sum_{l=1}^{L} \ln \mathcal{N} (\mu_{pt} | \mu_{pt0}, \frac{1}{\lambda} \Sigma_{pt}) + p(\Sigma) where :math:`p(\Sigma)` is the appropriate sum of one of four priors for covariances above. In 'full' case it is: .. math:: p(\Sigma) = \sum_{p=1}^{P} \sum_{l=1}^{L} \ln \text{IW}(\Sigma_{pt} | \Psi_{pt}, \nu_{pt}) In 'tied' case it is: .. math:: p(\Sigma) = \sum_{p=1}^{P} \ln \text{IW}(\Sigma_{p} | \Psi_p, \nu_p) In 'diag' case it is: .. math:: p(\Sigma) = \sum_{p=1}^{P} \sum_{l=1}^{L} \sum_{d=1}^{D} \Gamma^{-1}(\sigma_{kld} ^ 2 | \alpha_{kld}, \beta_{kld}) In 'spherical' case it is: .. math:: p(\Sigma) = \sum_{p=1}^{P} \sum_{l=1}^{L} \Gamma^{-1}(\sigma_{kl} ^ 2 | \alpha_{kl}, \beta_{kl}) Thus, in order to derive M-step for MAP-EM algorithm, we should maximize :math:`Q(\theta, \theta^{\text{ old }}) + \ln p(\theta)` w. r. t. :math:`\theta`. Let's maximize :math:`Q(\theta, \theta^{\text{ old }}) + \ln p(\theta)` w. r. t. some :math:`\pi_{pt}`. These values should satisfy :math:`\sum_{l=1}^{L} \pi_{pl} = 1` :math:`\forall p`. Taking this into account, we maximize :math:`Q(\theta, \theta^{\text{ old }}) + \ln p(\theta)` using Lagrange multiplier and maximizing the following value: .. math:: Q(\theta, \theta^{\text{ old }}) + \ln p(\theta) + \sum_{p=1}^{P} \lambda_p (\sum_{l=1}^{L} \pi_{pl} - 1) Deriving :math:`Q(\theta, \theta^{\text{ old }})` by :math:`\pi_{pt}`: .. math:: \frac{\partial Q(\theta, \theta^{\text{ old }})}{\partial \pi_{pt}} = \sum_{n=1}^{N} \gamma(z_{np}) \frac {\mathcal{N} (x_n | \mu_{ pt }, \Sigma_{pt})} {\sum_l \pi_{pl} \mathcal{N} (x_n | \mu_{pl}, \Sigma_{pl})} Deriving :math:`\ln p(\theta)` by by :math:`\pi_{pt}`: .. math:: \frac{\partial \ln p(\theta)}{\partial \pi_{pt}} = \frac{\alpha_{pt} - 1}{\pi_{pt}} Deriving :math:`\sum_{p=1}^{P} \lambda_p (\sum_{l=1}^{L} \pi_{pl} - 1)` by :math:`\pi_{pt}`: .. math:: \frac {\partial \sum_{p=1}^{P} \lambda_p (\sum_{l=1}^{L} \pi_{pl} - 1)} {\partial \pi_{pt}} = \lambda_p Final result; .. math:: \frac {\partial (Q(\theta, \theta^{\text{ old }}) + \ln p(\theta) + \sum_{p=1}^{P} \lambda_p (\sum_{l=1}^{L} \pi_{pl} - 1))} {\partial \pi_{pt}} = \sum_{n=1}^{N} \gamma(z_{np}) \frac {\mathcal{N} (x_n | \mu_{ pt }, \Sigma_{pt})} {\sum_l \pi_{pl} \mathcal{N} (x_n | \mu_{pl}, \Sigma_{pl})} + \frac{\alpha_{pt} - 1}{\pi_{pt}} + \lambda_p = 0 Multiplying by :math:`\pi_{pt}` and summing over *t* we get: .. math:: \sum_{n=1}^{N} \gamma(z_{np}) \frac {\sum_l \pi_{pl} \mathcal{N} (x_n | \mu_{ pt }, \Sigma_{pt})} {\sum_l \pi_{pl} \mathcal{N} (x_n | \mu_{pl}, \Sigma_{pl})} + \sum_{l=1}^{L} (\alpha_{pl} - 1) + \sum_{l=1}^{L} \pi_{pl} \lambda_p = 0 From which we get: .. math:: \sum_{n=1}^{N} \gamma(z_{np}) + \sum_{l=1}^{L} (\alpha_{pl} - 1) + \lambda_p = 0 \lambda_p = -\sum_{n=1}^{N} \gamma(z_{np}) - \sum_{l=1}^{L} (\alpha_{pl} - 1) Substituting the result for :math:`\lambda_p` into the original expression: .. math:: \sum_{n=1}^{N} \gamma(z_{np}) \frac {\mathcal{N} (x_n | \mu_{ pt }, \Sigma_{pt})} {\sum_l \pi_{pl} \mathcal{N} (x_n | \mu_{pl}, \Sigma_{pl})} + \frac{\alpha_{pt} - 1}{\pi_{pt}} = \sum_{n=1}^{N} \gamma(z_{np}) + \sum_{l=1}^{L} (\alpha_{pl} - 1) \sum_{n=1}^{N} \gamma(z_{np}) \frac {\pi_{pt} \mathcal{N} (x_n | \mu_{ pt }, \Sigma_{pt})} {\sum_l \pi_{pl} \mathcal{N} (x_n | \mu_{pl}, \Sigma_{pl})} + \alpha_{pt} - 1 = \pi_{pt} \Big(\sum_{n=1}^{N} \gamma(z_{np}) + \sum_{l=1}^{L} (\alpha_{pl} - 1)\Big) \frac{\sum_{n=1}^{N} \gamma(z_{np}) \frac {\pi_{pt} \mathcal{N} (x_n | \mu_{ pt }, \Sigma_{pt})} {\sum_l \pi_{pl} \mathcal{N} (x_n | \mu_{pl}, \Sigma_{pl})} + \alpha_{pt} - 1} { \sum_{n=1}^{N} \gamma(z_{np}) + \sum_{l=1}^{L} (\alpha_{pl} - 1)} = \pi_{pt} Let's introduce a few notations: .. math:: \frac {\pi_{pt} \mathcal{N} (x_n | \mu_{ pt }, \Sigma_{pt})} {\sum_l \pi_{pl} \mathcal{N} (x_n | \mu_{pl}, \Sigma_{pl})} = \gamma(\tilde{z}_{npt}) \sum_{n=1}^{N} \gamma(z_{np}) \gamma(\tilde{z}_{npt}) = N_{pt} \sum_{n=1}^{N} \gamma(z_{np}) = N_p Then the expression for maximizing the :math:`\pi_{pt}` is as follows: .. math:: \pi_{pt} = \frac{N_{pt} + \alpha_{pt} - 1} {N_p + \sum_{l=1}^{L} (\alpha_{pl} - 1)} Let's do the same with :math:`\mu_{pt}`. This time, there aren't any constraints, so the task of maximizing :math:`Q(\theta, \theta^{\text{ old }}) + \ln p(\theta)` reduces to finding partial derivative of this function w. r. t. :math:`\mu_{pt}` and equating it to zero. First, let's derivate :math:`\ln p(\theta)` using formula (85) from Matrix Cookbook: .. math:: \frac {\partial \ln p(\theta)} {\mu_{pt}} = \frac {\partial (\ln \mathcal{N} (\mu_{pt} | \mu_{pt0}, \frac{1}{\lambda_{pt}} \Sigma_{pt}))} {\partial \mu_{pt}} = \frac {\frac {\partial (\mathcal{N} (\mu_{pt} | \mu_{pt0}, \frac{1}{\lambda_{pt}} \Sigma_{pt}))} {\partial \mu_{pt}}} {\mathcal{N} (\mu_{pt} | \mu_{pt0}, \frac{1}{\lambda_{pt}} \Sigma_{pt})} \frac {\partial (\mathcal{N} (\mu_{pt} | \mu_{pt0}, \frac{1}{\lambda_{pt}} \Sigma_{pt}))} {\partial \mu_{pt}} = \frac {\partial \Big(\frac{1} {(2 \pi)^{D/2}} \frac {\sqrt{\lambda_{pt}}} {\left|\Sigma_{pt}\right|^{1/2}}\exp \left \{ -\frac {\lambda_{pt}} {2} (\mu_{pt} - \mu_{pt0})^T \Sigma_{pt}^{-1} (\mu_{pt} - \mu_{pt0}) \right \}\Big)} {\partial \mu_{pt}} = \mathcal{N} (\mu_{pt} | \mu_{pt0}, \frac{1}{\lambda_{pt}} \Sigma_{pt}) \frac {\partial (-\frac {\lambda_{pt}} {2} (\mu_{pt} - \mu_{pt0})^T \Sigma_{pt}^{-1} (\mu_{pt} - \mu_{pt0}))} {\partial \mu_{pt}} = \mathcal{N} (\mu_{pt} | \mu_{pt0}, \frac{1}{\lambda_{pt}} \Sigma_{pt}) \Big(-\frac {\lambda_{pt}} {2}\Big) 2 \Sigma_{pt}^{-1} (\mu_{pt} - \mu_{pt0}) = -\lambda_{pt} \Sigma_{pt}^{-1} (\mu_{pt} - \mu_{pt0}) \mathcal{N} (\mu_{pt} | \mu_{pt0}, \frac{1}{\lambda_{pt}} \Sigma_{pt}) \frac {\partial \ln p(\theta)} {\mu_{pt}} = -\lambda_{pt} \Sigma_{pt}^{-1} (\mu_{pt} - \mu_{pt0}) Then, let's derivate :math:`Q(\theta, \theta^{\text{old}})` using formula (86): .. math:: \frac{\partial Q(\theta, \theta^{\text{ old }})}{\partial \mu_{pt}} = \sum_{n=1}^{N} \gamma(z_{np}) \frac {\pi_{pt} \mathcal{N} (x_n | \mu_pt, \Sigma_pt)} {\sum_l \pi_{pl} \mathcal{N} (x_n | \mu_pl, \Sigma_pl)} \Sigma_{pt}^{-1} (x_n - \mu_{pt}) = \sum_{n=1}^{N} \gamma(z_{np}) \gamma(\tilde{z}_{npt}) \Sigma_{pt}^{-1} (x_n - \mu_{pt}) Then, let's derivate :math:`\ln p(\theta)`: .. math:: \frac {\partial \ln p(\theta)} {\mu_{pt}} = \frac {\partial (\ln \mathcal{N} (\mu_{pt} | \mu_{pt0}, \frac{1}{\lambda_{pt}} \Sigma_{pt}))} {\partial \mu_{pt}} = \frac {\frac {\partial (\mathcal{N} (\mu_{pt} | \mu_{pt0}, \frac{1}{\lambda_{pt}} \Sigma_{pt}))} {\partial \mu_{pt}}} {\mathcal{N} (\mu_{pt} | \mu_{pt0}, \frac{1}{\lambda_{pt}} \Sigma_{pt})} \frac {\partial (\mathcal{N} (\mu_{pt} | \mu_{pt0}, \frac{1}{\lambda_{pt}} \Sigma_{pt}))} {\partial \mu_{pt}} = \frac {\partial \Big(\frac{1} {(2 \pi)^{D/2}} \frac {\sqrt{\lambda_{pt}}} {\left|\Sigma_{pt}\right|^{1/2}}\exp \left \{ -\frac {\lambda_{pt}} {2} (\mu_{pt} - \mu_{pt0})^T \Sigma_{pt}^{-1} (\mu_{pt} - \mu_{pt0}) \right \}\Big)} {\partial \mu_{pt}} = \mathcal{N} (\mu_{pt} | \mu_{pt0}, \frac{1}{\lambda_{pt}} \Sigma_{pt}) \frac {\partial (-\frac {\lambda_{pt}} {2} (\mu_{pt} - \mu_{pt0})^T \Sigma_{pt}^{-1} (\mu_{pt} - \mu_{pt0}))} {\partial \mu_{pt}} = \mathcal{N} (\mu_{pt} | \mu_{pt0}, \frac{1}{\lambda_{pt}} \Sigma_{pt}) \Big(-\frac {\lambda_{pt}} {2}\Big) 2 \Sigma_{pt}^{-1} (\mu_{pt} - \mu_{pt0}) = -\lambda_{pt} \Sigma_{pt}^{-1} (\mu_{pt} - \mu_{pt0}) \mathcal{N} (\mu_{pt} | \mu_{pt0}, \frac{1}{\lambda_{pt}} \Sigma_{pt}) \frac {\partial \ln p(\theta)} {\mu_{pt}} = -\lambda_{pt} \Sigma_{pt}^{-1} (\mu_{pt} - \mu_{pt0}) Now, the result is: .. math:: \frac {\partial (Q(\theta, \theta^{\text{ old }}) + \ln p(\theta))} {\partial \mu_{pt}} = \sum_{n=1}^{N} \gamma(z_{np}) \gamma(\tilde{z}_{npt}) \Sigma_{pt}^{-1} (x_n - \mu_{pt}) - \lambda_{pt} \Sigma_{pt}^{-1} (\mu_{pt} - \mu_{pt0}) = 0 \sum_{n=1}^{N} \gamma(z_{np}) \gamma(\tilde{z}_{npt}) (x_n - \mu_{pt}) - \lambda_{pt} (\mu_{pt} - \mu_{pt0}) = 0 \sum_{n=1}^{N} \gamma(z_{np}) \gamma(\tilde{z}_{npt}) x_n - \mu_{pt}\sum_{n=1}^{N} \gamma(z_{np}) \gamma(\tilde{z}_{npt}) - \lambda_{pt} \mu_{pt} + \lambda_{pt}\mu_{pt0} = 0 \sum_{n=1}^{N} \gamma(z_{np}) \gamma(\tilde{z}_{npt}) x_n - \mu_{pt} N_{pt} - \lambda_{pt} \mu_{pt} + \lambda_{pt}\mu_{pt0} = 0 \sum_{n=1}^{N} \gamma(z_{np}) \gamma(\tilde{z}_{npt}) x_n + \lambda_{pt}\mu_{pt0} = \mu_{pt} (N_{pt} + \lambda_{pt}) \mu_{pt} = \frac {\sum_{n=1}^{N} \gamma(z_{np}) \gamma(\tilde{z}_{npt}) x_n + \lambda_{pt}\mu_{pt0}} {N_{pt} + \lambda_{pt}} Basically all the same with :math:`\Sigma`, but with 4 different variants of it, for full, tied, diagonal and spherical covariance. Let's start with 'full'. We're trying to find :math:`\Sigma_{pt}`. First, derivative of :math:`Q(\theta, \theta^{\text{ old }})`: .. math:: \frac {\partial Q(\theta, \theta^{\text{old}})} {\partial \Sigma_{pt}} = \sum_{n=1}^{N} \gamma(z_{np}) \gamma(\tilde{z}_{npt}) \frac {\frac {\partial \mathcal{N} (x_n | \mu_{pt}, \Sigma_{pt})} {\partial \Sigma_{pt}}} {\mathcal{N} (x_n | \mu_{pt}, \Sigma_{pt})} \frac {\partial \mathcal{N} (x | \mu, \Sigma)} {\partial \Sigma} = \frac {\partial \Big(\frac{1} {(2 \pi)^{D/2}} \frac {1} {\left|\Sigma\right|^{1/2}}\exp \left \{ -\frac {1} {2} (x - \mu)^T \Sigma^{-1} (x - \mu) \right \}\Big)} {\partial \Sigma} = = \frac{1} {(2 \pi)^{D/2}} \frac {\partial \left|\Sigma\right| ^ {-\frac{1}{2}}} {\partial \Sigma} \exp \{\cdots\} + \frac{1} {(2 \pi)^{D/2}} \frac {1} {\left|\Sigma\right|^{1/2}} \frac {\partial \exp \{\cdots\}} {\partial \Sigma} Using the chain rule and formula (49) from Matrix Cookbook to find the derivative of determinant, for the first term we get: .. math:: \frac {\partial \left|\Sigma\right| ^ {-\frac{1}{2}}} {\partial \Sigma} = \frac {\partial \left|\Sigma\right| ^ {-\frac{1}{2}}} {\partial \left|\Sigma\right|} \frac {\partial \left|\Sigma\right|} {\partial \Sigma} = - \frac {1} {2} \left|\Sigma\right| ^ {-\frac{3}{2}} \left|\Sigma\right| \Sigma^{-T} = - \frac {1} {2} \frac {1} {\left|\Sigma\right| ^ {1/2}} \Sigma^{-T} Using the chain rule and formula (61) from Matrix Cookbook to find the derivative of inverse, for the second term we get: .. math:: \frac {\partial \exp \{\cdots\}} {\partial \Sigma} = \frac {\partial \exp \{\cdots\}} {\{\cdots\}} \frac {\{\cdots\}} {\partial \Sigma} = \exp \{\cdots\} \Big(-\frac {1} {2} \Big) \frac {\partial \Big((x - \mu)^T \Sigma^{-1} (x - \mu) \Big)} {\partial \Sigma} = = \exp \{\cdots\} \Big(-\frac {1} {2} \Big) (-\Sigma^{-T} (x - \mu) (x - \mu)^T \Sigma^{-T}) Combining the two: .. math:: \frac {\partial \mathcal{N} (x | \mu, \Sigma)} {\partial \Sigma} = \frac{1} {(2 \pi)^{D/2}} \frac {1} {\left|\Sigma\right| ^ {1/2}} \exp \{\cdots\} \Big(-\frac {1} {2}\Big) \Sigma^{-T} + \frac{1} {(2 \pi)^{D/2}} \frac {1} {\left|\Sigma\right|^{1/2}} \exp \{\cdots\} \Big(-\frac {1} {2} \Big) (-\Sigma^{-T} (x - \mu) (x - \mu)^T \Sigma^{-T}) = = \mathcal{N} (x | \mu, \Sigma)\Big(-\frac {1} {2}\Big) \Sigma^{-T} + \mathcal{N} (x | \mu, \Sigma) \frac {1} {2} (\Sigma^{-T} (x - \mu) (x - \mu)^T \Sigma^{-T}) From which we finally get: .. math:: \frac {\partial Q(\theta, \theta^{\text{old}})} {\partial \Sigma_{pt}} = \sum_{n=1}^{N} \gamma(z_{np}) \gamma(\tilde{z}_{npt}) \Big( \Big(-\frac {1} {2}\Big) \Sigma_{pt}^{-T} + \frac {1} {2} (\Sigma_{pt}^{-T} (x_n - \mu_{pt}) (x_n - \mu_{pt})^T \Sigma_{pt}^{-T}) \Big) = = \Big(-\frac {1} {2}\Big) \Sigma_{pt}^{-T} \sum_{n=1}^{N} \gamma(z_{np}) \gamma(\tilde{z}_{npt}) + \frac {1} {2} \Sigma_{pt}^{-T} (\sum_{n=1}^{N} \gamma(z_{np}) \gamma(\tilde{z}_{npt}) (x_n - \mu_{pt}) (x_n - \mu_{pt})^T) \Sigma_{pt}^{-T} = = \Big(-\frac {1} {2}\Big) \Sigma_{pt}^{-T} N_{pt} + \frac {1} {2} \Sigma_{pt}^{-T} \big(\sum_{n=1}^{N} \gamma(z_{np}) \gamma(\tilde{z}_{npt}) (x_n - \mu_{pt}) (x_n - \mu_{pt})^T\big) \Sigma_{pt}^{-T} Now to :math:`\ln p(\theta)`: .. math:: \frac {\partial \ln p(\theta)} {\partial \Sigma_{pt}} = \frac {\frac {\partial \mathcal {N} (\mu_{pt} | \mu_{pt0}, \frac {1} {\lambda_pt} \Sigma_{pt})} {\partial \Sigma_{pt}}} {\mathcal {N} (\mu_{pt} | \mu_{pt0}, \frac {1} {\lambda_pt} \Sigma_{pt})} + \frac {\frac {\partial \text {IW} (\Sigma_{pt} | \Psi_{pt}, \nu_{pt})} {\partial \Sigma_{pt}}} {\text {IW} (\Sigma_{pt} | \Psi_{pt}, \nu_{pt})} We can calculate the derivative of normal distribution in the equation above using previous results: .. math:: \frac {\partial \mathcal {N} (x | \mu, \frac {1} {\lambda} \Sigma)} {\partial \Sigma} = \frac {\partial \Big(\frac{1} {(2 \pi)^{D/2}} \frac {\sqrt{\lambda}} {\left|\Sigma\right|^{1/2}} \exp \left \{ -\frac {\lambda} {2} (x - \mu)^T \Sigma^{-1} (x - \mu) \right \}\Big)} {\partial \Sigma} = \frac{\sqrt{\lambda}} {(2 \pi)^{D/2}} \frac {\partial \left|\Sigma\right| ^ {-\frac{1}{2}}} {\partial \Sigma} \exp \{\cdots\} + \frac{1} {(2 \pi)^{D/2}} \frac {\sqrt{\lambda}} {\left|\Sigma\right|^{1/2}} \frac {\partial \exp \{\cdots\}} {\partial \Sigma} = \mathcal{N} (x | \mu, \frac {1} {\lambda} \Sigma)\Big(-\frac {1} {2}\Big) \Sigma^{-T} + \mathcal{N} (x | \mu, \frac {1} {\lambda} \Sigma) \frac {\lambda} {2} \Sigma^{-T} (x - \mu) (x - \mu)^T \Sigma^{-T} Now to the derivative of inverse-Wishart distribution: .. math:: \frac {\partial \text {IW} (\Sigma | \Psi, \nu)} {\partial \Sigma} = \frac {\partial \Big( \frac{\left|\Psi\right|^{\frac{\nu}{2}}}{2 ^ \frac{\nu D} {2} \Gamma_D (\frac{\nu}{2})} \left|\Sigma\right|^{-\frac{\nu + D + 1}{2}} \exp \left \{-\frac{1}{2} \text{tr} (\Psi \Sigma^{-1}) \right \} \Big)} {\partial \Sigma} = \frac{ \left| \Psi \right| ^ {\frac {\nu} {2} } } {2 ^ \frac{\nu D} {2} \Gamma_D (\frac{\nu}{2})} \frac {\partial \left|\Sigma\right|^{-\frac{\nu + D + 1}{2}} } {\partial \Sigma} \exp \left \{-\frac{1}{2} \text{tr} (\Psi \Sigma^{-1}) \right \} + \frac{\left|\Psi\right|^{\frac{\nu}{2}}}{2 ^ \frac{\nu D} {2} \Gamma_D (\frac{\nu}{2})} \left|\Sigma\right|^{-\frac{\nu + D + 1}{2}} \exp \left \{-\frac{1}{2} \text{tr} (\Psi \Sigma^{-1}) \right \} \Big( - \frac {1} {2} \Big) \frac {\partial \text{tr} (\Psi \Sigma^{-1})} {\partial \Sigma} Using the same equation (49) from Matrix Cookbook, we get: .. math:: \frac {\partial \left|\Sigma\right|^{-\frac{\nu + D + 1}{2}} } {\partial \Sigma} = \frac {\partial \left|\Sigma\right|^{-\frac{\nu + D + 1}{2}} } {\partial \left| \Sigma \right|} \frac {\partial \left| \Sigma \right|} {\partial \Sigma} = - \frac {(\nu + D + 1)} {2} \left| \Sigma \right| ^ {-\frac{\nu + D + 1}{2}} \Sigma^{-1} \frac {\partial \left| \Sigma \right|} {\partial \Sigma} = -\frac {(\nu + D + 1)} {2} \left| \Sigma \right| ^ {-\frac{\nu + D + 1}{2}} \Sigma^{-1} \left| \Sigma \right| \Sigma^{-T} = = -\frac {(\nu + D + 1)} {2} \Sigma^{-T} \left| \Sigma \right| ^ {-\frac{\nu + D + 1}{2}} Using formula (63), for the derivative of a trace we get: .. math:: \frac {\partial \text{tr} (\Psi \Sigma^{-1})} {\partial \Sigma} = -\Sigma^{-T} \Psi^T \Sigma^{-T} Combining the two, we get: .. math:: \frac {\partial \text {IW} (\Sigma | \Psi, \nu)} {\partial \Sigma} = -\frac {(\nu + D + 1)} {2} \Sigma^{-T} \text {IW} (\Sigma | \Psi, \nu) + \frac {1} {2} \Sigma^{-T} \Psi^T \Sigma^{-T} \text {IW} (\Sigma | \Psi, \nu) Now, finally, we can get the whole derivative of prior distribution w. r. t. :math:`\Sigma_{pt}`: .. math:: \frac {\partial \ln p(\theta)} {\partial \Sigma_{pt}} = \Big(-\frac {1} {2}\Big) \Sigma_{pt}^{-T} + \frac {\lambda_{pt}} {2} \Sigma_{pt}^{-T} (\mu_{pt} - \mu_{pt0}) (\mu_{pt} - \mu_{pt0})^T \Sigma_{pt}^{-T} + \Big(-\frac {(\nu_{pt} + D + 1)} {2}\Big) \Sigma_{pt}^{-T} + \frac {1} {2} \Sigma_{pt}^{-T} \Psi_{pt}^T \Sigma_{pt}^{-T} Then, we can equate the derivative of :math:`Q(\theta, \theta ^ {\text{old}}) + \ln (\theta)` to 0: .. math:: \frac {\partial (Q(\theta, \theta ^ {\text{old}}) + \ln (\theta))} {\partial \Sigma_{pt}} = \Big(-\frac {1} {2}\Big) \Sigma_{pt}^{-T} N_{pt} + \frac {1} {2} \Sigma_{pt}^{-T} \big(\sum_{n=1}^{N} \gamma(z_{np}) \gamma(\tilde{z}_{npt}) (x_n - \mu_{pt}) (x_n - \mu_{pt})^T\big) \Sigma_{pt}^{-T} + + \Big(-\frac {1} {2}\Big) \Sigma_{pt}^{-T} + \frac {\lambda_{pt}} {2} \Sigma_{pt}^{-T} (\mu_{pt} - \mu_{pt0}) (\mu_{pt} - \mu_{pt0})^T \Sigma_{pt}^{-T} + \Big(-\frac {(\nu_{pt} + D + 1)} {2}\Big) \Sigma_{pt}^{-T} + \frac {1} {2} \Sigma_{pt}^{-T} \Psi_{pt}^T \Sigma_{pt}^{-T} = 0 Multiplying by :math:`2 \Sigma^{T}` from both sides, we get: .. math:: -\Sigma_{pt}^{T} N_{pt} + \sum_{n=1}^{N} \gamma(z_{np}) \gamma(\tilde{z}_{npt}) (x_n - \mu_{pt}) (x_n - \mu_{pt})^T - \Sigma_{pt}^{T} + \lambda_{pt} (\mu_{pt} - \mu_{pt0}) (\mu_{pt} - \mu_{pt0})^T - (\nu_{pt} + D + 1) \Sigma_{pt}^{T} + \Psi_{pt}^T = 0 Let's, once again, introduce a few notations: .. math:: C_{npt} = (x_n - \mu_{pt}) (x_n - \mu_{pt})^T C_{\mu_{pt}} = (\mu_{pt} - \mu_{pt0}) (\mu_{pt} - \mu_{pt0})^T Let's rewrite the expression above using these notations: .. math:: -\Sigma_{pt}^{T} N_{pt} + \sum_{n=1}^{N} \gamma(z_{np}) \gamma(\tilde{z}_{npt}) C_{npt} - \Sigma_{pt}^{T} + \lambda_{pt} C_{\mu_{pt}} - (\nu_{pt} + D + 1) \Sigma_{pt}^{T} + \Psi_{pt}^T = 0 \sum_{n=1}^{N} \gamma(z_{np}) \gamma(\tilde{z}_{npt}) C_{npt} + \lambda_{pt} C_{\mu_{pt}} + \Psi_{pt}^T = \Sigma_{pt}^{T} (N_{pt} + 1 + (\nu_{pt} + D + 1)) \Sigma_{pt}^T = \Sigma_{pt} = \frac {\sum_{n=1}^{N} \gamma(z_{np}) \gamma(\tilde{z}_{npt}) C_{npt} + \lambda_{pt} C_{\mu_{pt}} + \Psi_{pt}^T} {N_{pt} + 1 + \nu_{pt} + D + 1} hmmlearn-0.3.0/doc/source/_static/000077500000000000000000000000001441755530000170145ustar00rootroot00000000000000hmmlearn-0.3.0/doc/source/_static/hide_some_gallery_elements.css000066400000000000000000000001241441755530000250720ustar00rootroot00000000000000div.sphx-glr-download-link-note, div.sphx-glr-download-jupyter { display: none; } hmmlearn-0.3.0/doc/source/api.rst000066400000000000000000000034561441755530000167010ustar00rootroot00000000000000API Reference ============= This is the class and function reference of ``hmmlearn``. Please refer to the :ref:`full user guide ` for further details, as the class and function raw specifications may not be enough to give full guidelines on their uses. hmmlearn.base ------------- ConvergenceMonitor ~~~~~~~~~~~~~~~~~~ .. autoclass:: hmmlearn.base.ConvergenceMonitor _AbstractHMM ~~~~~~~~~~~~ .. autoclass:: hmmlearn.base._AbstractHMM :exclude-members: set_params, get_params, _get_param_names :private-members: :no-inherited-members: BaseHMM ~~~~~~~ .. autoclass:: hmmlearn.base.BaseHMM :exclude-members: set_params, get_params, _get_param_names :private-members: :no-inherited-members: VariationalBaseHMM ~~~~~~~~~~~~~~~~~~ .. autoclass:: hmmlearn.base.VariationalBaseHMM :exclude-members: set_params, get_params, _get_param_names :private-members: :no-inherited-members: hmmlearn.hmm ------------ GaussianHMM ~~~~~~~~~~~ .. autoclass:: hmmlearn.hmm.GaussianHMM :exclude-members: covars_, set_params, get_params GMMHMM ~~~~~~ .. autoclass:: hmmlearn.hmm.GMMHMM :exclude-members: set_params, get_params MultinomialHMM ~~~~~~~~~~~~~~ .. autoclass:: hmmlearn.hmm.MultinomialHMM :exclude-members: set_params, get_params CategoricalHMM ~~~~~~~~~~~~~~ .. autoclass:: hmmlearn.hmm.CategoricalHMM :exclude-members: set_params, get_params PoissonHMM ~~~~~~~~~~ .. autoclass:: hmmlearn.hmm.PoissonHMM :exclude-members: set_params, get_params hmmlearn.vhmm ------------- VariationalCategoricalHMM ~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: hmmlearn.vhmm.VariationalCategoricalHMM :exclude-members: set_params, get_params VariationalGaussianHMM ~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: hmmlearn.vhmm.VariationalGaussianHMM :exclude-members: set_params, get_params hmmlearn-0.3.0/doc/source/changelog.rst000066400000000000000000000000371441755530000200470ustar00rootroot00000000000000.. include:: ../../CHANGES.rst hmmlearn-0.3.0/doc/source/conf.py000066400000000000000000000025141441755530000166670ustar00rootroot00000000000000import hmmlearn needs_sphinx = '2.0' extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.autosummary', 'sphinx.ext.intersphinx', 'sphinx.ext.napoleon', 'sphinx_gallery.gen_gallery', ] project = 'hmmlearn' copyright = '2010-present, hmmlearn developers (BSD License)' version = release = hmmlearn.__version__ default_role = 'py:obj' pygments_style = 'sphinx' language = 'en' # -- Options for extensions -------------------------------------------------- autodoc_default_options = { 'members': None, 'inherited-members': None, 'special-members': '__init__', } intersphinx_mapping = { 'sklearn': ('https://scikit-learn.org/stable/', None), } napoleon_use_ivar = True napoleon_use_rtype = False sphinx_gallery_conf = { 'examples_dirs': '../../examples', 'gallery_dirs': 'auto_examples' } # -- Options for HTML output ------------------------------------------------- html_theme = 'pydata_sphinx_theme' html_theme_options = { 'github_url': 'https://github.com/hmmlearn/hmmlearn', } html_css_files = ['hide_some_gallery_elements.css'] html_static_path = ['_static'] htmlhelp_basename = 'hmmlearn_doc' # -- Options for LaTeX output ------------------------------------------------ latex_documents = [('index', 'user_guide.tex', 'hmmlearn user guide', 'hmmlearn developers', 'manual'), ] hmmlearn-0.3.0/doc/source/index.rst000066400000000000000000000012131441755530000172240ustar00rootroot00000000000000hmmlearn ======== Unsupervised learning and inference of Hidden Markov Models: * Simple algorithms and models to learn HMMs (`Hidden Markov Models `_) in Python, * Follows `scikit-learn `_ API as close as possible, but adapted to sequence data, * Built on scikit-learn, NumPy, SciPy, and Matplotlib, * Open source, commercially usable --- `BSD license `_. .. _user_guide: User guide: table of contents ----------------------------- .. toctree:: :maxdepth: 2 tutorial auto_examples/index api changelog hmmlearn-0.3.0/doc/source/tutorial.rst000066400000000000000000000210561441755530000177670ustar00rootroot00000000000000.. _tutorial: Tutorial ======== .. currentmodule:: hmmlearn ``hmmlearn`` implements the Hidden Markov Models (HMMs). The HMM is a generative probabilistic model, in which a sequence of observable :math:`\mathbf{X}` variables is generated by a sequence of internal hidden states :math:`\mathbf{Z}`. The hidden states are not observed directly. The transitions between hidden states are assumed to have the form of a (first-order) Markov chain. They can be specified by the start probability vector :math:`\boldsymbol{\pi}` and a transition probability matrix :math:`\mathbf{A}`. The emission probability of an observable can be any distribution with parameters :math:`\boldsymbol{\theta}` conditioned on the current hidden state. The HMM is completely determined by :math:`\boldsymbol{\pi}`, :math:`\mathbf{A}` and :math:`\boldsymbol{\theta}`. There are three fundamental problems for HMMs: * Given the model parameters and observed data, estimate the optimal sequence of hidden states. * Given the model parameters and observed data, calculate the model likelihood. * Given just the observed data, estimate the model parameters. The first and the second problem can be solved by the dynamic programming algorithms known as the Viterbi algorithm and the Forward-Backward algorithm, respectively. The last one can be solved by an iterative Expectation-Maximization (EM) algorithm, known as the Baum-Welch algorithm. .. topic:: References: - Lawrence R. Rabiner "A tutorial on hidden Markov models and selected applications in speech recognition", Proceedings of the IEEE 77.2, pp. 257-286, 1989. - Jeff A. Bilmes, "A gentle tutorial of the EM algorithm and its application to parameter estimation for Gaussian mixture and hidden Markov models.", 1998. - Mark Stamp. "A revealing introduction to hidden Markov models". Tech. rep. Department of Computer Science, San Jose State University, 2018. url: http://www.cs.sjsu.edu/~stamp/RUA/HMM.pdf. Available models ---------------- .. autosummary:: :nosignatures: hmm.CategoricalHMM hmm.GaussianHMM hmm.GMMHMM hmm.MultinomialHMM hmm.PoissonHMM vhmm.VariationalCategoricalHMM vhmm.VariationalGaussianHMM :ref:`Read on ` for details on how to implement a HMM with a custom emission probability. Building HMM and generating samples ----------------------------------- You can build a HMM instance by passing the parameters described above to the constructor. Then, you can generate samples from the HMM by calling :meth:`~.BaseHMM.sample`. >>> import numpy as np >>> from hmmlearn import hmm >>> np.random.seed(42) >>> >>> model = hmm.GaussianHMM(n_components=3, covariance_type="full") >>> model.startprob_ = np.array([0.6, 0.3, 0.1]) >>> model.transmat_ = np.array([[0.7, 0.2, 0.1], ... [0.3, 0.5, 0.2], ... [0.3, 0.3, 0.4]]) >>> model.means_ = np.array([[0.0, 0.0], [3.0, -3.0], [5.0, 10.0]]) >>> model.covars_ = np.tile(np.identity(2), (3, 1, 1)) >>> X, Z = model.sample(100) The transition probability matrix need not to be ergodic. For instance, a left-right HMM can be defined as follows: >>> lr = hmm.GaussianHMM(n_components=3, covariance_type="diag", ... init_params="cm", params="cmt") >>> lr.startprob_ = np.array([1.0, 0.0, 0.0]) >>> lr.transmat_ = np.array([[0.5, 0.5, 0.0], ... [0.0, 0.5, 0.5], ... [0.0, 0.0, 1.0]]) If any of the required parameters are missing, :meth:`~.BaseHMM.sample` will raise an exception: >>> model = hmm.GaussianHMM(n_components=3) >>> X, Z = model.sample(100) Traceback (most recent call last): ... sklearn.exceptions.NotFittedError: This GaussianHMM instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator. Fixing parameters ----------------- Each HMM parameter has a character code which can be used to customize its initialization and estimation. The EM algorithm needs a starting point to proceed, thus prior to training each parameter is assigned a value either random or computed from the data. It is possible to hook into this process and provide a starting point explicitly. To do so 1. ensure that the character code for the parameter is missing from :attr:`~.BaseHMM.init_params` and then 2. set the parameter to the desired value. For example, consider a HMM with an explicitly initialized transition probability matrix: >>> model = hmm.GaussianHMM(n_components=3, n_iter=100, init_params="mcs") >>> model.transmat_ = np.array([[0.7, 0.2, 0.1], ... [0.3, 0.5, 0.2], ... [0.3, 0.3, 0.4]]) A similar trick applies to parameter estimation. If you want to fix some parameter at a specific value, remove the corresponding character from :attr:`~.BaseHMM.params` and set the parameter value before training. .. topic:: Examples: * :doc:`/auto_examples/plot_hmm_sampling_and_decoding` Training HMM parameters and inferring the hidden states ------------------------------------------------------- You can train an HMM by calling the :meth:`~.BaseHMM.fit` method. The input is a matrix of concatenated sequences of observations (*aka* samples) along with the lengths of the sequences (see :ref:`Working with multiple sequences `). Note, since the EM algorithm is a gradient-based optimization method, it will generally get stuck in local optima. You should in general try to run ``fit`` with various initializations and select the highest scored model. The score of the model can be calculated by the :meth:`~.BaseHMM.score` method. The inferred optimal hidden states can be obtained by calling :meth:`~.BaseHMM.predict` method. The ``predict`` method can be specified with a decoder algorithm. Currently the Viterbi algorithm (``"viterbi"``), and maximum a posteriori estimation (``"map"``) are supported. This time, the input is a single sequence of observed values. Note, the states in ``remodel`` will have a different order than those in the generating model. >>> remodel = hmm.GaussianHMM(n_components=3, covariance_type="full", n_iter=100) >>> remodel.fit(X) GaussianHMM(... >>> Z2 = remodel.predict(X) .. topic:: Examples: * :doc:`/auto_examples/plot_casino` Monitoring convergence ---------------------- The number of EM algorithm iterations is upper bounded by the ``n_iter`` parameter. The training proceeds until ``n_iter`` steps were performed or the change in score is lower than the specified threshold ``tol``. Note, that depending on the data, the EM algorithm may or may not achieve convergence in the given number of steps. You can use the :attr:`~.BaseHMM.monitor_` attribute to diagnose convergence: >>> remodel.monitor_ ConvergenceMonitor( history=[...], iter=15, n_iter=100, tol=0.01, verbose=False, ) >>> remodel.monitor_.converged True .. _multiple_sequences: Working with multiple sequences ------------------------------- All of the examples so far were using a single sequence of observations. The input format in the case of multiple sequences is a bit involved and is best understood by example. Consider two 1D sequences: >>> X1 = [[0.5], [1.0], [-1.0], [0.42], [0.24]] >>> X2 = [[2.4], [4.2], [0.5], [-0.24]] To pass both sequences to :meth:`~.BaseHMM.fit` or :meth:`~.BaseHMM.predict`, first concatenate them into a single array and then compute an array of sequence lengths: >>> X = np.concatenate([X1, X2]) >>> lengths = [len(X1), len(X2)] Finally, just call the desired method with ``X`` and ``lengths``: >>> hmm.GaussianHMM(n_components=3).fit(X, lengths) GaussianHMM(... Saving and loading HMM ---------------------- After training, a HMM can be easily persisted for future use with the standard :mod:`pickle` module: >>> import pickle >>> with open("filename.pkl", "wb") as file: pickle.dump(remodel, file) >>> with open("filename.pkl", "rb") as file: pickle.load(file) GaussianHMM(... .. _customizing: Implementing HMMs with custom emission probabilities ---------------------------------------------------- If you want to implement a custom emission probability (e.g. Cauchy), you have to subclass :class:`~.BaseHMM` and override the following methods .. currentmodule:: hmmlearn.base .. autosummary:: BaseHMM._init BaseHMM._check BaseHMM._generate_sample_from_state BaseHMM._compute_log_likelihood BaseHMM._compute_likelihood BaseHMM._initialize_sufficient_statistics BaseHMM._accumulate_sufficient_statistics BaseHMM._do_mstep Optionally, only one of `~.BaseHMM._compute_likelihood` and `~.BaseHMM._compute_log_likelihood` need to be overridden, and the base implementation will provide the other. hmmlearn-0.3.0/examples/000077500000000000000000000000001441755530000151375ustar00rootroot00000000000000hmmlearn-0.3.0/examples/README.txt000066400000000000000000000000411441755530000166300ustar00rootroot00000000000000.. _examples: Examples ======== hmmlearn-0.3.0/examples/plot_casino.py000066400000000000000000000121121441755530000200200ustar00rootroot00000000000000""" Dishonest Casino Example ------------------------ We'll use the ubiquitous dishonest casino example to demonstrate how to train a Hidden Markov Model (HMM) on somewhat realistic test data (e.g. http://www.mcb111.org/w06/durbin_book.pdf Chapter 3). In this example, we suspect that a casino is trading out a fair die (singular or dice) for a loaded die. We want to figure out 1) when the loaded die was used (i.e. the most likely path) 2) how often the loaded die is used (i.e. the transition probabilities) and 3) the probabilities for each outcome of a roll for the loaded die (i.e. the emission probabilities). """ # %% # First, import necessary modules and functions. import numpy as np import matplotlib.pyplot as plt from hmmlearn import hmm # %% # Now, let's act as the casino and exchange a fair die for a loaded one # and generate a series of rolls that someone at the casino would # observe. # make our generative model with two components, a fair die and a # loaded die gen_model = hmm.CategoricalHMM(n_components=2, random_state=99) # the first state is the fair die so let's start there so no one # catches on right away gen_model.startprob_ = np.array([1.0, 0.0]) # now let's say that we sneak the loaded die in: # here, we have a 95% chance to continue using the fair die and a 5% # chance to switch to the loaded die # when we enter the loaded die state, we have a 90% chance of staying # in that state and a 10% chance of leaving gen_model.transmat_ = np.array([[0.95, 0.05], [0.1, 0.9]]) # now let's set the emission means: # the first state is a fair die with equal probabilities and the # second is loaded by being biased toward rolling a six gen_model.emissionprob_ = \ np.array([[1 / 6, 1 / 6, 1 / 6, 1 / 6, 1 / 6, 1 / 6], [1 / 10, 1 / 10, 1 / 10, 1 / 10, 1 / 10, 1 / 2]]) # simulate the loaded dice rolls rolls, gen_states = gen_model.sample(30000) # plot states over time, let's just look at the first rolls for clarity fig, ax = plt.subplots() ax.plot(gen_states[:500]) ax.set_title('States over time') ax.set_xlabel('Time (# of rolls)') ax.set_ylabel('State') fig.show() # plot rolls for the fair and loaded states fig, ax = plt.subplots() ax.hist(rolls[gen_states == 0], label='fair', alpha=0.5, bins=np.arange(7) - 0.5, density=True) ax.hist(rolls[gen_states == 1], label='loaded', alpha=0.5, bins=np.arange(7) - 0.5, density=True) ax.set_title('Roll probabilities by state') ax.set_xlabel('Count') ax.set_ylabel('Roll') ax.legend() fig.show() # %% # Now, let's see if we can recover our hidden states, transmission matrix # and emission probabilities. # split our data into training and validation sets (50/50 split) X_train = rolls[:rolls.shape[0] // 2] X_validate = rolls[rolls.shape[0] // 2:] # check optimal score gen_score = gen_model.score(X_validate) best_score = best_model = None n_fits = 50 np.random.seed(13) for idx in range(n_fits): model = hmm.CategoricalHMM( n_components=2, random_state=idx, init_params='se') # don't init transition, set it below # we need to initialize with random transition matrix probabilities # because the default is an even likelihood transition # we know transitions are rare (otherwise the casino would get caught!) # so let's have an Dirichlet random prior with an alpha value of # (0.1, 0.9) to enforce our assumption transitions happen roughly 10% # of the time model.transmat_ = np.array([np.random.dirichlet([0.9, 0.1]), np.random.dirichlet([0.1, 0.9])]) model.fit(X_train) score = model.score(X_validate) print(f'Model #{idx}\tScore: {score}') if best_score is None or score > best_score: best_model = model best_score = score print(f'Generated score: {gen_score}\nBest score: {best_score}') # use the Viterbi algorithm to predict the most likely sequence of states # given the model states = best_model.predict(rolls) # plot our recovered states compared to generated (aim 1) fig, ax = plt.subplots() ax.plot(gen_states[:500], label='generated') ax.plot(states[:500] + 1.5, label='recovered') ax.set_yticks([]) ax.set_title('States compared to generated') ax.set_xlabel('Time (# rolls)') ax.set_xlabel('State') ax.legend() fig.show() # %% # Let's check our learned transition probabilities and see if they match. print(f'Transmission Matrix Generated:\n{gen_model.transmat_.round(3)}\n\n' f'Transmission Matrix Recovered:\n{best_model.transmat_.round(3)}\n\n') # %% # Finally, let's see if we can tell how the die is loaded. print(f'Emission Matrix Generated:\n{gen_model.emissionprob_.round(3)}\n\n' f'Emission Matrix Recovered:\n{best_model.emissionprob_.round(3)}\n\n') # %% # In this case, we were able to get very good estimates of the transition and # emission matrices, but decoding the states was imperfect. That's because # the decoding algorithm is greedy and picks the most likely series of states # which isn't necessarily what happens in real life. Even so, our model could # tell us when to watch for the loaded die and we'd have a better chance at # catching them red-handed. hmmlearn-0.3.0/examples/plot_gaussian_model_selection.py000066400000000000000000000051401441755530000236060ustar00rootroot00000000000000""" Using AIC and BIC for Model Selection ------------------------------------- This example will demonstrate how the Akaike Information Criterion (AIC) and Bayesian Information Criterion (BIC) values may be used to select the number of components for a model. 1) We train models with varying numbers of ``n_components``. 2) For each ``n_components`` we train multiple models with different random initializations; the best model is kept. 3) Now we plot the values of the AIC and BIC for each n_components. A clear minimum is detected for the model with ``n_components=4``. We also see that using the log-likelihood of the training data is not suitable for model selection, as it is always increasing. """ import numpy as np import matplotlib.pyplot as plt from sklearn.utils import check_random_state from hmmlearn.hmm import GaussianHMM rs = check_random_state(546) # %% # Our model to generate sample data from: model = GaussianHMM(4, init_params="") model.n_features = 4 model.startprob_ = np.array([1/4., 1/4., 1/4., 1/4.]) model.transmat_ = np.array([[0.3, 0.4, 0.2, 0.1], [0.1, 0.2, 0.3, 0.4], [0.5, 0.2, 0.1, 0.2], [0.25, 0.25, 0.25, 0.25]]) model.means_ = np.array([[-2.5], [0], [2.5], [5.]]) model.covars_ = np.sqrt([[0.25], [0.25], [0.25], [0.25]]) X, _ = model.sample(1000, random_state=rs) lengths = [X.shape[0]] # %% # Search over various n_components and examine the # AIC, BIC, and the LL of the data. Train a few different # models with different random initializations, saving the one # with the best LL. aic = [] bic = [] lls = [] ns = [2, 3, 4, 5, 6] for n in ns: best_ll = None best_model = None for i in range(10): h = GaussianHMM(n, n_iter=200, tol=1e-4, random_state=rs) h.fit(X) score = h.score(X) if not best_ll or best_ll < best_ll: best_ll = score best_model = h aic.append(best_model.aic(X)) bic.append(best_model.bic(X)) lls.append(best_model.score(X)) # %% # Visualize our results: a clear minimum is seen for 4 components # which matches our expectation. fig, ax = plt.subplots() ln1 = ax.plot(ns, aic, label="AIC", color="blue", marker="o") ln2 = ax.plot(ns, bic, label="BIC", color="green", marker="o") ax2 = ax.twinx() ln3 = ax2.plot(ns, lls, label="LL", color="orange", marker="o") ax.legend(handles=ax.lines + ax2.lines) ax.set_title("Using AIC/BIC for Model Selection") ax.set_ylabel("Criterion Value (lower is better)") ax2.set_ylabel("LL (higher is better)") ax.set_xlabel("Number of HMM Components") fig.tight_layout() plt.show() hmmlearn-0.3.0/examples/plot_hmm_sampling_and_decoding.py000066400000000000000000000074631441755530000237120ustar00rootroot00000000000000""" Sampling from and decoding an HMM --------------------------------- This script shows how to sample points from a Hidden Markov Model (HMM): we use a 4-state model with specified mean and covariance. The plot shows the sequence of observations generated with the transitions between them. We can see that, as specified by our transition matrix, there are no transition between component 1 and 3. Then, we decode our model to recover the input parameters. """ import numpy as np import matplotlib.pyplot as plt from hmmlearn import hmm # Prepare parameters for a 4-components HMM # Initial population probability startprob = np.array([0.6, 0.3, 0.1, 0.0]) # The transition matrix, note that there are no transitions possible # between component 1 and 3 transmat = np.array([[0.7, 0.2, 0.0, 0.1], [0.3, 0.5, 0.2, 0.0], [0.0, 0.3, 0.5, 0.2], [0.2, 0.0, 0.2, 0.6]]) # The means of each component means = np.array([[0.0, 0.0], [0.0, 11.0], [9.0, 10.0], [11.0, -1.0]]) # The covariance of each component covars = .5 * np.tile(np.identity(2), (4, 1, 1)) # Build an HMM instance and set parameters gen_model = hmm.GaussianHMM(n_components=4, covariance_type="full") # Instead of fitting it from the data, we directly set the estimated # parameters, the means and covariance of the components gen_model.startprob_ = startprob gen_model.transmat_ = transmat gen_model.means_ = means gen_model.covars_ = covars # Generate samples X, Z = gen_model.sample(500) # Plot the sampled data fig, ax = plt.subplots() ax.plot(X[:, 0], X[:, 1], ".-", label="observations", ms=6, mfc="orange", alpha=0.7) # Indicate the component numbers for i, m in enumerate(means): ax.text(m[0], m[1], 'Component %i' % (i + 1), size=17, horizontalalignment='center', bbox=dict(alpha=.7, facecolor='w')) ax.legend(loc='best') fig.show() # %% # Now, let's ensure we can recover our parameters. scores = list() models = list() for n_components in (3, 4, 5): for idx in range(10): # define our hidden Markov model model = hmm.GaussianHMM(n_components=n_components, covariance_type='full', random_state=idx) model.fit(X[:X.shape[0] // 2]) # 50/50 train/validate models.append(model) scores.append(model.score(X[X.shape[0] // 2:])) print(f'Converged: {model.monitor_.converged}' f'\tScore: {scores[-1]}') # get the best model model = models[np.argmax(scores)] n_states = model.n_components print(f'The best model had a score of {max(scores)} and {n_states} ' 'states') # use the Viterbi algorithm to predict the most likely sequence of states # given the model states = model.predict(X) # %% # Let's plot our states compared to those generated and our transition matrix # to get a sense of our model. We can see that the recovered states follow # the same path as the generated states, just with the identities of the # states transposed (i.e. instead of following a square as in the first # figure, the nodes are switch around but this does not change the basic # pattern). The same is true for the transition matrix. # plot model states over time fig, ax = plt.subplots() ax.plot(Z, states) ax.set_title('States compared to generated') ax.set_xlabel('Generated State') ax.set_ylabel('Recovered State') fig.show() # plot the transition matrix fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 5)) ax1.imshow(gen_model.transmat_, aspect='auto', cmap='spring') ax1.set_title('Generated Transition Matrix') ax2.imshow(model.transmat_, aspect='auto', cmap='spring') ax2.set_title('Recovered Transition Matrix') for ax in (ax1, ax2): ax.set_xlabel('State To') ax.set_ylabel('State From') fig.tight_layout() fig.show() hmmlearn-0.3.0/examples/plot_multinomial_hmm.py000066400000000000000000000104241441755530000217430ustar00rootroot00000000000000""" A simple example demonstrating Multinomial HMM ---------------------------------------------- The Multinomial HMM is a generalization of the Categorical HMM, with some key differences: - a Categorical__ (or generalized Bernoulli/multinoulli) distribution models an outcome of a die with `n_features` possible values, i.e. it is a generalization of the Bernoulli distribution where there are ``n_features`` categories instead of the binary success/failure outcome; a Categorical HMM has the emission probabilities for each component parametrized by Categorical distributions. - a Multinomial__ distribution models the outcome of ``n_trials`` independent rolls of die, each with ``n_features`` possible values; i.e. - when ``n_trials = 1`` and ``n_features = 2``, it is a Bernoulli distribution, - when ``n_trials > 1`` and ``n_features = 2``, it is a Binomial distribution, - when ``n_trials = 1`` and ``n_features > 2``, it is a Categorical distribution. The emission probabilities for each component of a Multinomial HMM are parameterized by Multinomial distributions. __ https://en.wikipedia.org/wiki/Categorical_distribution __ https://en.wikipedia.org/wiki/Multinomial_distribution """ import numpy as np from hmmlearn import hmm # For this example, we will model the stages of a conversation, # where each sentence is "generated" with an underlying topic, "cat" or "dog" states = ["cat", "dog"] id2topic = dict(zip(range(len(states)), states)) # we are more likely to talk about cats first start_probs = np.array([0.6, 0.4]) # For each topic, the probability of saying certain words can be modeled by # a distribution over vocabulary associated with the categories vocabulary = ["tail", "fetch", "mouse", "food"] # if the topic is "cat", we are more likely to talk about "mouse" # if the topic is "dog", we are more likely to talk about "fetch" emission_probs = np.array([[0.25, 0.1, 0.4, 0.25], [0.2, 0.5, 0.1, 0.2]]) # Also assume it's more likely to stay in a state than transition to the other trans_mat = np.array([[0.8, 0.2], [0.2, 0.8]]) # Pretend that every sentence we speak only has a total of 5 words, # i.e. we independently utter a word from the vocabulary 5 times per sentence # we observe the following bag of words (BoW) for 8 sentences: observations = [["tail", "mouse", "mouse", "food", "mouse"], ["food", "mouse", "mouse", "food", "mouse"], ["tail", "mouse", "mouse", "tail", "mouse"], ["food", "mouse", "food", "food", "tail"], ["tail", "fetch", "mouse", "food", "tail"], ["tail", "fetch", "fetch", "food", "fetch"], ["fetch", "fetch", "fetch", "food", "tail"], ["food", "mouse", "food", "food", "tail"], ["tail", "mouse", "mouse", "tail", "mouse"], ["fetch", "fetch", "fetch", "fetch", "fetch"]] # Convert "sentences" to numbers: vocab2id = dict(zip(vocabulary, range(len(vocabulary)))) def sentence2counts(sentence): ans = [] for word, idx in vocab2id.items(): count = sentence.count(word) ans.append(count) return ans X = [] for sentence in observations: row = sentence2counts(sentence) X.append(row) data = np.array(X, dtype=int) # pretend this is repeated, so we have more data to learn from: lengths = [len(X)]*5 sequences = np.tile(data, (5,1)) # Set up model: model = hmm.MultinomialHMM(n_components=len(states), n_trials=len(observations[0]), n_iter=50, init_params='') model.n_features = len(vocabulary) model.startprob_ = start_probs model.transmat_ = trans_mat model.emissionprob_ = emission_probs model.fit(sequences, lengths) logprob, received = model.decode(sequences) print("Topics discussed:") print([id2topic[x] for x in received]) print("Learned emission probs:") print(model.emissionprob_) print("Learned transition matrix:") print(model.transmat_) # Try to reset and refit: new_model = hmm.MultinomialHMM(n_components=len(states), n_trials=len(observations[0]), n_iter=50, init_params='ste') new_model.fit(sequences, lengths) logprob, received = new_model.decode(sequences) print("\nNew Model") print("Topics discussed:") print([id2topic[x] for x in received]) print("Learned emission probs:") print(new_model.emissionprob_) print("Learned transition matrix:") print(new_model.transmat_) hmmlearn-0.3.0/examples/plot_poisson_hmm.py000066400000000000000000000102311441755530000210770ustar00rootroot00000000000000""" Using a Hidden Markov Model with Poisson Emissions to Understand Earthquakes ---------------------------------------------------------------------------- Let's look at data of magnitude 7+ earthquakes between 1900-2006 in the world collected by the US Geological Survey as described in this textbook: Zucchini & MacDonald, "Hidden Markov Models for Time Series" (https://ayorho.files.wordpress.com/2011/05/chapter1.pdf). The goal is to see if we can separate out different tectonic processes that cause earthquakes based on their frequency of occurance. The idea is that each tectonic boundary may cause earthquakes with a particular distribution of waiting times depending on how active it is. This might tell help us predict future earthquake danger, espeically on a geological time scale. """ import numpy as np import matplotlib.pyplot as plt from scipy.stats import poisson from hmmlearn import hmm # earthquake data from http://earthquake.usgs.gov/ earthquakes = np.array([ 13, 14, 8, 10, 16, 26, 32, 27, 18, 32, 36, 24, 22, 23, 22, 18, 25, 21, 21, 14, 8, 11, 14, 23, 18, 17, 19, 20, 22, 19, 13, 26, 13, 14, 22, 24, 21, 22, 26, 21, 23, 24, 27, 41, 31, 27, 35, 26, 28, 36, 39, 21, 17, 22, 17, 19, 15, 34, 10, 15, 22, 18, 15, 20, 15, 22, 19, 16, 30, 27, 29, 23, 20, 16, 21, 21, 25, 16, 18, 15, 18, 14, 10, 15, 8, 15, 6, 11, 8, 7, 18, 16, 13, 12, 13, 20, 15, 16, 12, 18, 15, 16, 13, 15, 16, 11, 11]) # Plot the sampled data fig, ax = plt.subplots() ax.plot(earthquakes, ".-", ms=6, mfc="orange", alpha=0.7) ax.set_xticks(range(0, earthquakes.size, 10)) ax.set_xticklabels(range(1906, 2007, 10)) ax.set_xlabel('Year') ax.set_ylabel('Count') fig.show() # %% # Now, fit a Poisson Hidden Markov Model to the data. scores = list() models = list() for n_components in range(1, 5): for idx in range(10): # ten different random starting states # define our hidden Markov model model = hmm.PoissonHMM(n_components=n_components, random_state=idx, n_iter=10) model.fit(earthquakes[:, None]) models.append(model) scores.append(model.score(earthquakes[:, None])) print(f'Converged: {model.monitor_.converged}\t\t' f'Score: {scores[-1]}') # get the best model model = models[np.argmax(scores)] print(f'The best model had a score of {max(scores)} and ' f'{model.n_components} components') # use the Viterbi algorithm to predict the most likely sequence of states # given the model states = model.predict(earthquakes[:, None]) # %% # Let's plot the waiting times from our most likely series of states of # earthquake activity with the earthquake data. As we can see, the # model with the maximum likelihood had different states which may reflect # times of varying earthquake danger. # plot model states over time fig, ax = plt.subplots() ax.plot(model.lambdas_[states], ".-", ms=6, mfc="orange") ax.plot(earthquakes) ax.set_title('States compared to generated') ax.set_xlabel('State') # %% # Fortunately, 2006 ended with a period of relative tectonic stability, and, # if we look at our transition matrix, we can see that the off-diagonal terms # are small, meaning that the state transitions are rare and it's unlikely that # there will be high earthquake danger in the near future. fig, ax = plt.subplots() ax.imshow(model.transmat_, aspect='auto', cmap='spring') ax.set_title('Transition Matrix') ax.set_xlabel('State To') ax.set_ylabel('State From') # %% # Finally, let's look at the distribution of earthquakes compared to our # waiting time parameter values. We can see that our model fits the # distribution fairly well, replicating results from the reference. # get probabilities for each state given the data, take the average # to find the proportion of time in that state prop_per_state = model.predict_proba(earthquakes[:, None]).mean(axis=0) # earthquake counts to plot bins = sorted(np.unique(earthquakes)) fig, ax = plt.subplots() ax.hist(earthquakes, bins=bins, density=True) ax.plot(bins, poisson.pmf(bins, model.lambdas_).T @ prop_per_state) ax.set_title('Histogram of Earthquakes with Fitted Poisson States') ax.set_xlabel('Number of Earthquakes') ax.set_ylabel('Proportion') plt.show() hmmlearn-0.3.0/examples/plot_variational_inference.py000066400000000000000000000146471441755530000231120ustar00rootroot00000000000000""" Learning an HMM using VI and EM over a set of Gaussian sequences ---------------------------------------------------------------- We train models with a variety of number of states (N) for each algorithm, and then examine which model is "best", by printing the log-likelihood or variational lower bound for each N. We will see that an HMM trained using VI will prefer the correct number of states, while an HMM learning with EM will prefer as many states as possible. Note, for models trained with EM, some other criteria such as AIC/BIC, or held out test data, could be used to select the correct number of hidden states. """ import collections import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gs import scipy.stats from sklearn.utils import check_random_state from hmmlearn import hmm, vhmm import matplotlib def gaussian_hinton_diagram(startprob, transmat, means, variances, vmin=0, vmax=1, infer_hidden=True): """ Show the initial state probabilities, the transition probabilities as heatmaps, and draw the emission distributions. """ num_states = transmat.shape[0] f = plt.figure(figsize=(3*(num_states), 2*num_states)) grid = gs.GridSpec(3, 3) ax = f.add_subplot(grid[0, 0]) ax.imshow(startprob[None, :], vmin=vmin, vmax=vmax) ax.set_title("Initial Probabilities", size=14) ax = f.add_subplot(grid[1:, 0]) ax.imshow(transmat, vmin=vmin, vmax=vmax) ax.set_title("Transition Probabilities", size=14) ax = f.add_subplot(grid[1:, 1:]) for i in range(num_states): keep = True if infer_hidden: if np.all(np.abs(transmat[i] - transmat[i][0]) < 1e-4): keep = False if keep: s_min = means[i] - 10 * variances[i] s_max = means[i] + 10 * variances[i] xx = np.arange(s_min, s_max, (s_max - s_min) / 1000) norm = scipy.stats.norm(means[i], np.sqrt(variances[i])) yy = norm.pdf(xx) keep = yy > .01 ax.plot(xx[keep], yy[keep], label="State: {}".format(i)) ax.set_title("Emissions Probabilities", size=14) ax.legend(loc="best") f.tight_layout() return f np.set_printoptions(formatter={'float_kind': "{:.3f}".format}) rs = check_random_state(2022) sample_length = 500 num_samples = 1 # With random initialization, it takes a few tries to find the # best solution num_inits = 5 num_states = np.arange(1, 7) verbose = False # Prepare parameters for a 4-components HMM # And Sample several sequences from this model model = hmm.GaussianHMM(4, init_params="") model.n_features = 4 # Initial population probability model.startprob_ = np.array([0.25, 0.25, 0.25, 0.25]) # The transition matrix, note that there are no transitions possible # between component 1 and 3 model.transmat_ = np.array([[0.2, 0.2, 0.3, 0.3], [0.3, 0.2, 0.2, 0.3], [0.2, 0.3, 0.3, 0.2], [0.3, 0.3, 0.2, 0.2]]) # The means and covariance of each component model.means_ = np.array([[-1.5], [0], [1.5], [3.]]) model.covars_ = np.array([[0.25], [0.25], [0.25], [0.25]])**2 # Generate training data sequences = [] lengths = [] for i in range(num_samples): sequences.extend(model.sample(sample_length, random_state=rs)[0]) lengths.append(sample_length) sequences = np.asarray(sequences) # Train a suite of models, and keep track of the best model for each # number of states, and algorithm best_scores = collections.defaultdict(dict) best_models = collections.defaultdict(dict) for n in num_states: for i in range(num_inits): vi = vhmm.VariationalGaussianHMM(n, n_iter=1000, covariance_type="full", implementation="scaling", tol=1e-6, random_state=rs, verbose=verbose) vi.fit(sequences, lengths) lb = vi.monitor_.history[-1] print(f"Training VI({n}) Variational Lower Bound={lb} " f"Iterations={len(vi.monitor_.history)} ") if best_models["VI"].get(n) is None or best_scores["VI"][n] < lb: best_models["VI"][n] = vi best_scores["VI"][n] = lb em = hmm.GaussianHMM(n, n_iter=1000, covariance_type="full", implementation="scaling", tol=1e-6, random_state=rs, verbose=verbose) em.fit(sequences, lengths) ll = em.monitor_.history[-1] print(f"Training EM({n}) Final Log Likelihood={ll} " f"Iterations={len(vi.monitor_.history)} ") if best_models["EM"].get(n) is None or best_scores["EM"][n] < ll: best_models["EM"][n] = em best_scores["EM"][n] = ll # Display the model likelihood/variational lower bound for each N # and show the best learned model for algo, scores in best_scores.items(): best = max(scores.values()) best_n, best_score = max(scores.items(), key=lambda x: x[1]) for n, score in scores.items(): flag = "* <- Best Model" if score == best_score else "" print(f"{algo}({n}): {score:.4f}{flag}") print(f"Best Model {algo}") best_model = best_models[algo][best_n] print(best_model.transmat_) print(best_model.means_) print(best_model.covars_) # Also inpsect the VI model with 6 states, to see how it has sparse structure vi_model = best_models["VI"][6] em_model = best_models["EM"][6] print("VI solution for 6 states: Notice sparsity among states 1 and 4") print(vi_model.transmat_) print(vi_model.means_) print(vi_model.covars_) print("EM solution for 6 states") print(em_model.transmat_) print(em_model.means_) print(em_model.covars_) f = gaussian_hinton_diagram( vi_model.startprob_, vi_model.transmat_, vi_model.means_.ravel(), vi_model.covars_.ravel(), ) f.suptitle("Variational Inference Solution", size=16) f = gaussian_hinton_diagram( em_model.startprob_, em_model.transmat_, em_model.means_.ravel(), em_model.covars_.ravel(), ) f.suptitle("Expectation-Maximization Solution", size=16) plt.show() hmmlearn-0.3.0/lib/000077500000000000000000000000001441755530000140675ustar00rootroot00000000000000hmmlearn-0.3.0/lib/hmmlearn/000077500000000000000000000000001441755530000156725ustar00rootroot00000000000000hmmlearn-0.3.0/lib/hmmlearn/.gitignore000066400000000000000000000000201441755530000176520ustar00rootroot00000000000000*.c _version.py hmmlearn-0.3.0/lib/hmmlearn/__init__.py000066400000000000000000000007111441755530000200020ustar00rootroot00000000000000""" hmmlearn ======== ``hmmlearn`` is a set of algorithms for learning and inference of Hidden Markov Models. """ try: import setuptools_scm __version__ = setuptools_scm.get_version( # xref setup.py root="../..", relative_to=__file__, version_scheme="post-release", local_scheme="node-and-date") except (ImportError, LookupError): try: from ._version import version as __version__ except ImportError: pass hmmlearn-0.3.0/lib/hmmlearn/_emissions.py000066400000000000000000000371251441755530000204240ustar00rootroot00000000000000import functools import inspect import warnings import numpy as np from scipy import special from scipy.stats import multinomial, poisson from sklearn.utils import check_random_state from .base import BaseHMM, _AbstractHMM from .stats import log_multivariate_normal_density from .utils import fill_covars, log_normalize _CATEGORICALHMM_DOC_SUFFIX = """ Notes ----- Unlike other HMM classes, `CategoricalHMM` ``X`` arrays have shape ``(n_samples, 1)`` (instead of ``(n_samples, n_features)``). Consider using `sklearn.preprocessing.LabelEncoder` to transform your input to the right format. """ def _make_wrapper(func): return functools.wraps(func)(lambda *args, **kwargs: func(*args, **kwargs)) class BaseCategoricalHMM(_AbstractHMM): def __init_subclass__(cls): for name in [ "decode", "fit", "predict", "predict_proba", "sample", "score", "score_samples", ]: meth = getattr(cls, name) doc = inspect.getdoc(meth) if doc is None or _CATEGORICALHMM_DOC_SUFFIX in doc: wrapper = meth else: wrapper = _make_wrapper(meth) wrapper.__doc__ = ( doc.replace("(n_samples, n_features)", "(n_samples, 1)") + _CATEGORICALHMM_DOC_SUFFIX) setattr(cls, name, wrapper) def _check_and_set_n_features(self, X): """ Check if ``X`` is a sample from a categorical distribution, i.e. an array of non-negative integers. """ if not np.issubdtype(X.dtype, np.integer): raise ValueError("Symbols should be integers") if X.min() < 0: raise ValueError("Symbols should be nonnegative") if self.n_features is not None: if self.n_features - 1 < X.max(): raise ValueError( f"Largest symbol is {X.max()} but the model only emits " f"symbols up to {self.n_features - 1}") else: self.n_features = X.max() + 1 def _get_n_fit_scalars_per_param(self): nc = self.n_components nf = self.n_features return { "s": nc - 1, "t": nc * (nc - 1), "e": nc * (nf - 1), } def _compute_likelihood(self, X): if X.shape[1] != 1: warnings.warn("Inputs of shape other than (n_samples, 1) are " "deprecated.", DeprecationWarning) X = np.concatenate(X)[:, None] return self.emissionprob_[:, X.squeeze(1)].T def _initialize_sufficient_statistics(self): stats = super()._initialize_sufficient_statistics() stats['obs'] = np.zeros((self.n_components, self.n_features)) return stats def _accumulate_sufficient_statistics( self, stats, X, lattice, posteriors, fwdlattice, bwdlattice): super()._accumulate_sufficient_statistics(stats=stats, X=X, lattice=lattice, posteriors=posteriors, fwdlattice=fwdlattice, bwdlattice=bwdlattice) if 'e' in self.params: if X.shape[1] != 1: warnings.warn("Inputs of shape other than (n_samples, 1) are " "deprecated.", DeprecationWarning) X = np.concatenate(X)[:, None] np.add.at(stats['obs'].T, X.squeeze(1), posteriors) def _generate_sample_from_state(self, state, random_state=None): cdf = np.cumsum(self.emissionprob_[state, :]) random_state = check_random_state(random_state) return [(cdf > random_state.rand()).argmax()] class BaseGaussianHMM(_AbstractHMM): def _get_n_fit_scalars_per_param(self): nc = self.n_components nf = self.n_features return { "s": nc - 1, "t": nc * (nc - 1), "m": nc * nf, "c": { "spherical": nc, "diag": nc * nf, "full": nc * nf * (nf + 1) // 2, "tied": nf * (nf + 1) // 2, }[self.covariance_type], } def _compute_log_likelihood(self, X): return log_multivariate_normal_density( X, self.means_, self._covars_, self.covariance_type) def _initialize_sufficient_statistics(self): stats = super()._initialize_sufficient_statistics() stats['post'] = np.zeros(self.n_components) stats['obs'] = np.zeros((self.n_components, self.n_features)) stats['obs**2'] = np.zeros((self.n_components, self.n_features)) if self.covariance_type in ('tied', 'full'): stats['obs*obs.T'] = np.zeros((self.n_components, self.n_features, self.n_features)) return stats def _accumulate_sufficient_statistics( self, stats, X, lattice, posteriors, fwdlattice, bwdlattice): super()._accumulate_sufficient_statistics(stats=stats, X=X, lattice=lattice, posteriors=posteriors, fwdlattice=fwdlattice, bwdlattice=bwdlattice) if self._needs_sufficient_statistics_for_mean(): stats['post'] += posteriors.sum(axis=0) stats['obs'] += posteriors.T @ X if self._needs_sufficient_statistics_for_covars(): if self.covariance_type in ('spherical', 'diag'): stats['obs**2'] += posteriors.T @ X**2 elif self.covariance_type in ('tied', 'full'): # posteriors: (nt, nc); obs: (nt, nf); obs: (nt, nf) # -> (nc, nf, nf) stats['obs*obs.T'] += np.einsum( 'ij,ik,il->jkl', posteriors, X, X) def _needs_sufficient_statistics_for_mean(self): """ Whether the sufficient statistics needed to update the means are updated during calls to `fit`. """ raise NotImplementedError("Must be overriden in subclass") def _needs_sufficient_statistics_for_covars(self): """ Whhether the sufficient statistics needed to update the covariances are updated during calls to `fit`. """ raise NotImplementedError("Must be overriden in subclass") def _generate_sample_from_state(self, state, random_state): return random_state.multivariate_normal( self.means_[state], self.covars_[state] ) class BaseGMMHMM(BaseHMM): def _get_n_fit_scalars_per_param(self): nc = self.n_components nf = self.n_features nm = self.n_mix return { "s": nc - 1, "t": nc * (nc - 1), "m": nc * nm * nf, "c": { "spherical": nc * nm, "diag": nc * nm * nf, "full": nc * nm * nf * (nf + 1) // 2, "tied": nc * nf * (nf + 1) // 2, }[self.covariance_type], "w": nm - 1, } def _compute_log_weighted_gaussian_densities(self, X, i_comp): cur_means = self.means_[i_comp] cur_covs = self.covars_[i_comp] if self.covariance_type == 'spherical': cur_covs = cur_covs[:, None] log_cur_weights = np.log(self.weights_[i_comp]) return log_multivariate_normal_density( X, cur_means, cur_covs, self.covariance_type ) + log_cur_weights def _compute_log_likelihood(self, X): logprobs = np.empty((len(X), self.n_components)) for i in range(self.n_components): log_denses = self._compute_log_weighted_gaussian_densities(X, i) with np.errstate(under="ignore"): logprobs[:, i] = special.logsumexp(log_denses, axis=1) return logprobs def _initialize_sufficient_statistics(self): stats = super()._initialize_sufficient_statistics() stats['post_mix_sum'] = np.zeros((self.n_components, self.n_mix)) stats['post_sum'] = np.zeros(self.n_components) if 'm' in self.params: lambdas, mus = self.means_weight, self.means_prior stats['m_n'] = lambdas[:, :, None] * mus if 'c' in self.params: stats['c_n'] = np.zeros_like(self.covars_) # These statistics are stored in arrays and updated in-place. # We accumulate chunks of data for multiple sequences (aka # multiple frames) during fitting. The fit(X, lengths) method # in the BaseHMM class will call # _accumulate_sufficient_statistics once per sequence in the # training samples. Data from all sequences needs to be # accumulated and fed into _do_mstep. return stats def _accumulate_sufficient_statistics(self, stats, X, lattice, post_comp, fwdlattice, bwdlattice): super()._accumulate_sufficient_statistics( stats, X, lattice, post_comp, fwdlattice, bwdlattice ) n_samples, _ = X.shape # Statistics shapes: # post_comp_mix (n_samples, n_components, n_mix) # samples (n_samples, n_features) # centered (n_samples, n_components, n_mix, n_features) post_mix = np.zeros((n_samples, self.n_components, self.n_mix)) for p in range(self.n_components): log_denses = self._compute_log_weighted_gaussian_densities(X, p) log_normalize(log_denses, axis=-1) with np.errstate(under="ignore"): post_mix[:, p, :] = np.exp(log_denses) with np.errstate(under="ignore"): post_comp_mix = post_comp[:, :, None] * post_mix stats['post_mix_sum'] += post_comp_mix.sum(axis=0) stats['post_sum'] += post_comp.sum(axis=0) if 'm' in self.params: # means stats stats['m_n'] += np.einsum('ijk,il->jkl', post_comp_mix, X) if 'c' in self.params: # covariance stats centered = X[:, None, None, :] - self.means_ def outer_f(x): # Outer product over features. return x[..., :, None] * x[..., None, :] if self.covariance_type == 'full': centered_dots = outer_f(centered) c_n = np.einsum('ijk,ijklm->jklm', post_comp_mix, centered_dots) elif self.covariance_type == 'diag': centered2 = np.square(centered, out=centered) # reuse c_n = np.einsum('ijk,ijkl->jkl', post_comp_mix, centered2) elif self.covariance_type == 'spherical': # Faster than (x**2).sum(-1). centered_norm2 = np.einsum('...i,...i', centered, centered) c_n = np.einsum('ijk,ijk->jk', post_comp_mix, centered_norm2) elif self.covariance_type == 'tied': centered_dots = outer_f(centered) c_n = np.einsum('ijk,ijklm->jlm', post_comp_mix, centered_dots) stats['c_n'] += c_n def _generate_sample_from_state(self, state, random_state): cur_weights = self.weights_[state] i_gauss = random_state.choice(self.n_mix, p=cur_weights) if self.covariance_type == 'tied': # self.covars_.shape == (n_components, n_features, n_features) # shouldn't that be (n_mix, ...)? covs = self.covars_ else: covs = self.covars_[:, i_gauss] covs = fill_covars(covs, self.covariance_type, self.n_components, self.n_features) return random_state.multivariate_normal( self.means_[state, i_gauss], covs[state] ) class BaseMultinomialHMM(BaseHMM): def _check_and_set_n_features(self, X): # Also sets n_trials. """ Check if ``X`` is a sample from a multinomial distribution, i.e. an array of non-negative integers, summing up to n_trials. """ super()._check_and_set_n_features(X) if not np.issubdtype(X.dtype, np.integer) or X.min() < 0: raise ValueError("Symbol counts should be nonnegative integers") if self.n_trials is None: self.n_trials = X.sum(axis=1) elif not (X.sum(axis=1) == self.n_trials).all(): raise ValueError("Total count for each sample should add up to " "the number of trials") def _get_n_fit_scalars_per_param(self): nc = self.n_components nf = self.n_features return { "s": nc - 1, "t": nc * (nc - 1), "e": nc * (nf - 1), } def _compute_likelihood(self, X): probs = np.empty((len(X), self.n_components)) n_trials = X.sum(axis=1) for c in range(self.n_components): probs[:, c] = multinomial.pmf( X, n=n_trials, p=self.emissionprob_[c, :]) return probs def _compute_log_likelihood(self, X): logprobs = np.empty((len(X), self.n_components)) n_trials = X.sum(axis=1) for c in range(self.n_components): logprobs[:, c] = multinomial.logpmf( X, n=n_trials, p=self.emissionprob_[c, :]) return logprobs def _initialize_sufficient_statistics(self): stats = super()._initialize_sufficient_statistics() stats['obs'] = np.zeros((self.n_components, self.n_features)) return stats def _accumulate_sufficient_statistics(self, stats, X, framelogprob, posteriors, fwdlattice, bwdlattice): super()._accumulate_sufficient_statistics( stats, X, framelogprob, posteriors, fwdlattice, bwdlattice) if 'e' in self.params: stats['obs'] += posteriors.T @ X def _generate_sample_from_state(self, state, random_state): try: n_trials, = np.unique(self.n_trials) except ValueError: raise ValueError("For sampling, a single n_trials must be given") return multinomial.rvs(n=n_trials, p=self.emissionprob_[state, :], random_state=random_state) class BasePoissonHMM(BaseHMM): def _get_n_fit_scalars_per_param(self): nc = self.n_components nf = self.n_features return { "s": nc - 1, "t": nc * (nc - 1), "l": nc * nf, } def _compute_likelihood(self, X): probs = np.empty((len(X), self.n_components)) for c in range(self.n_components): probs[:, c] = poisson.pmf(X, self.lambdas_[c]).prod(axis=1) return probs def _compute_log_likelihood(self, X): logprobs = np.empty((len(X), self.n_components)) for c in range(self.n_components): logprobs[:, c] = poisson.logpmf(X, self.lambdas_[c]).sum(axis=1) return logprobs def _initialize_sufficient_statistics(self): stats = super()._initialize_sufficient_statistics() stats['post'] = np.zeros(self.n_components) stats['obs'] = np.zeros((self.n_components, self.n_features)) return stats def _accumulate_sufficient_statistics(self, stats, obs, lattice, posteriors, fwdlattice, bwdlattice): super()._accumulate_sufficient_statistics( stats, obs, lattice, posteriors, fwdlattice, bwdlattice) if 'l' in self.params: stats['post'] += posteriors.sum(axis=0) stats['obs'] += posteriors.T @ obs def _generate_sample_from_state(self, state, random_state): return random_state.poisson(self.lambdas_[state]) hmmlearn-0.3.0/lib/hmmlearn/_kl_divergence.py000066400000000000000000000067471441755530000212220ustar00rootroot00000000000000""" All implementations are based upon the following: http://www.fil.ion.ucl.ac.uk/~wpenny/publications/densities.ps """ import numpy as np from scipy.special import gammaln, digamma from . import _utils def kl_dirichlet(q, p): """ KL Divergence between two dirichlet distributions KL(q || p) = ln [gamma(q)/gamma(p)] - sum [ ln [gamma(q_j)/gamma(p_j)] - (q_j - p_j) (digamma(q_j) - digamma(p_j)] """ q = np.asarray(q) p = np.asarray(p) qsum = q.sum() psum = p.sum() return (gammaln(qsum) - gammaln(psum) - np.sum(gammaln(q) - gammaln(p)) + np.einsum("i,i->", (q - p), (digamma(q) - digamma(qsum)))) def kl_normal_distribution(mean_q, variance_q, mean_p, variance_p): """KL Divergence between two normal distributions.""" result = ((np.log(variance_p / variance_q)) / 2 + ((mean_q - mean_p)**2 + variance_q) / (2 * variance_p) - .5) assert result >= 0, result return result def kl_multivariate_normal_distribution(mean_q, covar_q, mean_p, covar_p): """ KL Divergence of two Multivariate Normal Distribtuions q(x) = Normal(x; mean_q, variance_q) p(x) = Normal(x; mean_p, variance_p) """ # Ensure arrays mean_q = np.asarray(mean_q) covar_q = np.asarray(covar_q) mean_p = np.asarray(mean_p) covar_p = np.asarray(covar_p) # Need the precision of distribution p precision_p = np.linalg.inv(covar_p) mean_diff = mean_q - mean_p D = mean_q.shape[0] # These correspond to the four terms in the ~wpenny paper documented above return .5 * (_utils.logdet(covar_p) - _utils.logdet(covar_q) + np.trace(precision_p @ covar_q) + mean_diff @ precision_p @ mean_diff - D) def kl_gamma_distribution(b_q, c_q, b_p, c_p): """ KL Divergence between two gamma distributions q(x) = Gamma(x; b_q, c_q) p(x) = Gamma(x; b_p, c_p) """ result = ((b_q - b_p) * digamma(b_q) - gammaln(b_q) + gammaln(b_p) + b_p * (np.log(c_q) - np.log(c_p)) + b_q * (c_p-c_q) / c_q) assert result >= 0, result return result def kl_wishart_distribution(dof_q, scale_q, dof_p, scale_p): """ KL Divergence between two Wishart Distributions q(x) = Wishart(R|dof_q, scale_q) p(x) = Wishart(R|dof_p, scale_p) Definition from: Shihao Ji, B. Krishnapuram, and L. Carin, "Variational Bayes for continuous hidden Markov models and its application to active learning," IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 28, no. 4, pp. 522–532, Apr. 2006, doi: 10.1109/TPAMI.2006.85. """ scale_q = np.asarray(scale_q) scale_p = np.asarray(scale_p) D = scale_p.shape[0] return ((dof_q - dof_p)/2 * _E(dof_q, scale_q) - D * dof_q / 2 + dof_q / 2 * np.trace(scale_p @ np.linalg.inv(scale_q)) # Division of logarithm turned into subtraction here + _logZ(dof_p, scale_p) - _logZ(dof_q, scale_q)) def _E(dof, scale): r""" $L(a, B) = \int \mathcal{Wishart}(\Gamma; a, B) \log |\Gamma| d\Gamma$ """ return (-_utils.logdet(scale / 2) + digamma((dof - np.arange(scale.shape[0])) / 2).sum()) def _logZ(dof, scale): D = scale.shape[0] return ((D * (D - 1) / 4) * np.log(np.pi) - dof / 2 * _utils.logdet(scale / 2) + gammaln((dof - np.arange(scale.shape[0])) / 2).sum()) hmmlearn-0.3.0/lib/hmmlearn/_utils.py000066400000000000000000000062701441755530000175500ustar00rootroot00000000000000"""Private utilities.""" import warnings import numpy as np def logdet(a): sign, logdet = np.linalg.slogdet(a) if (sign < 0).any(): warnings.warn("invalid value encountered in log", RuntimeWarning) return np.where(sign < 0, np.nan, logdet) else: return logdet def split_X_lengths(X, lengths): if lengths is None: return [X] else: cs = np.cumsum(lengths) n_samples = len(X) if cs[-1] != n_samples: raise ValueError( f"lengths array {lengths} doesn't sum to {n_samples} samples") return np.split(X, cs)[:-1] # Copied from scikit-learn 0.19. def _validate_covars(covars, covariance_type, n_components): """Do basic checks on matrix covariance sizes and values.""" from scipy import linalg if covariance_type == 'spherical': if len(covars) != n_components: raise ValueError("'spherical' covars have length n_components") elif np.any(covars <= 0): raise ValueError("'spherical' covars must be positive") elif covariance_type == 'tied': if covars.shape[0] != covars.shape[1]: raise ValueError("'tied' covars must have shape (n_dim, n_dim)") elif (not np.allclose(covars, covars.T) or np.any(linalg.eigvalsh(covars) <= 0)): raise ValueError("'tied' covars must be symmetric, " "positive-definite") elif covariance_type == 'diag': if len(covars.shape) != 2: raise ValueError("'diag' covars must have shape " "(n_components, n_dim)") elif np.any(covars <= 0): raise ValueError("'diag' covars must be positive") elif covariance_type == 'full': if len(covars.shape) != 3: raise ValueError("'full' covars must have shape " "(n_components, n_dim, n_dim)") elif covars.shape[1] != covars.shape[2]: raise ValueError("'full' covars must have shape " "(n_components, n_dim, n_dim)") for n, cv in enumerate(covars): if (not np.allclose(cv, cv.T) or np.any(linalg.eigvalsh(cv) <= 0)): raise ValueError("component %d of 'full' covars must be " "symmetric, positive-definite" % n) else: raise ValueError("covariance_type must be one of " + "'spherical', 'tied', 'diag', 'full'") # Copied from scikit-learn 0.19. def distribute_covar_matrix_to_match_covariance_type( tied_cv, covariance_type, n_components): """Create all the covariance matrices from a given template.""" if covariance_type == 'spherical': cv = np.tile(tied_cv.mean() * np.ones(tied_cv.shape[1]), (n_components, 1)) elif covariance_type == 'tied': cv = tied_cv elif covariance_type == 'diag': cv = np.tile(np.diag(tied_cv), (n_components, 1)) elif covariance_type == 'full': cv = np.tile(tied_cv, (n_components, 1, 1)) else: raise ValueError("covariance_type must be one of " + "'spherical', 'tied', 'diag', 'full'") return cv hmmlearn-0.3.0/lib/hmmlearn/base.py000066400000000000000000001370331441755530000171650ustar00rootroot00000000000000import logging import string import sys from collections import deque import numpy as np from scipy import linalg, special from sklearn.base import BaseEstimator from sklearn.utils.validation import ( check_array, check_is_fitted, check_random_state) from . import _hmmc, _kl_divergence as _kl, _utils from .utils import normalize, log_normalize _log = logging.getLogger(__name__) #: Supported decoder algorithms. DECODER_ALGORITHMS = frozenset(("viterbi", "map")) class ConvergenceMonitor: """ Monitor and report convergence to :data:`sys.stderr`. Attributes ---------- history : deque The log probability of the data for the last two training iterations. If the values are not strictly increasing, the model did not converge. iter : int Number of iterations performed while training the model. Examples -------- Use custom convergence criteria by subclassing ``ConvergenceMonitor`` and redefining the ``converged`` method. The resulting subclass can be used by creating an instance and pointing a model's ``monitor_`` attribute to it prior to fitting. >>> from hmmlearn.base import ConvergenceMonitor >>> from hmmlearn import hmm >>> >>> class ThresholdMonitor(ConvergenceMonitor): ... @property ... def converged(self): ... return (self.iter == self.n_iter or ... self.history[-1] >= self.tol) >>> >>> model = hmm.GaussianHMM(n_components=2, tol=5, verbose=True) >>> model.monitor_ = ThresholdMonitor(model.monitor_.tol, ... model.monitor_.n_iter, ... model.monitor_.verbose) """ _template = "{iter:>10d} {log_prob:>16.8f} {delta:>+16.8f}" def __init__(self, tol, n_iter, verbose): """ Parameters ---------- tol : double Convergence threshold. EM has converged either if the maximum number of iterations is reached or the log probability improvement between the two consecutive iterations is less than threshold. n_iter : int Maximum number of iterations to perform. verbose : bool Whether per-iteration convergence reports are printed. """ self.tol = tol self.n_iter = n_iter self.verbose = verbose self.history = deque() self.iter = 0 def __repr__(self): class_name = self.__class__.__name__ params = sorted(dict(vars(self), history=list(self.history)).items()) return ("{}(\n".format(class_name) + "".join(map(" {}={},\n".format, *zip(*params))) + ")") def _reset(self): """Reset the monitor's state.""" self.iter = 0 self.history.clear() def report(self, log_prob): """ Report convergence to :data:`sys.stderr`. The output consists of three columns: iteration number, log probability of the data at the current iteration and convergence rate. At the first iteration convergence rate is unknown and is thus denoted by NaN. Parameters ---------- log_prob : float The log probability of the data as computed by EM algorithm in the current iteration. """ if self.verbose: delta = log_prob - self.history[-1] if self.history else np.nan message = self._template.format( iter=self.iter + 1, log_prob=log_prob, delta=delta) print(message, file=sys.stderr) # Allow for some wiggleroom based on precision. precision = np.finfo(float).eps ** (1/2) if self.history and (log_prob - self.history[-1]) < -precision: delta = log_prob - self.history[-1] _log.warning(f"Model is not converging. Current: {log_prob}" f" is not greater than {self.history[-1]}." f" Delta is {delta}") self.history.append(log_prob) self.iter += 1 @property def converged(self): """Whether the EM algorithm converged.""" # XXX we might want to check that ``log_prob`` is non-decreasing. return (self.iter == self.n_iter or (len(self.history) >= 2 and self.history[-1] - self.history[-2] < self.tol)) class _AbstractHMM(BaseEstimator): """ Base class for Hidden Markov Models learned via Expectation-Maximization and Variational Bayes. """ def __init__(self, n_components, algorithm, random_state, n_iter, tol, verbose, params, init_params, implementation): """ Parameters ---------- n_components : int Number of states in the model. algorithm : {"viterbi", "map"}, optional Decoder algorithm. random_state: RandomState or an int seed, optional A random number generator instance. n_iter : int, optional Maximum number of iterations to perform. tol : float, optional Convergence threshold. EM will stop if the gain in log-likelihood is below this value. verbose : bool, optional Whether per-iteration convergence reports are printed to :data:`sys.stderr`. Convergence can also be diagnosed using the :attr:`monitor_` attribute. params, init_params : string, optional The parameters that get updated during (``params``) or initialized before (``init_params``) the training. Can contain any combination of 's' for startprob, 't' for transmat, and other characters for subclass-specific emission parameters. Defaults to all parameters. implementation: string, optional Determines if the forward-backward algorithm is implemented with logarithms ("log"), or using scaling ("scaling"). The default is to use logarithms for backwards compatability. However, the scaling implementation is generally faster. """ self.n_components = n_components self.params = params self.init_params = init_params self.algorithm = algorithm self.n_iter = n_iter self.tol = tol self.verbose = verbose self.implementation = implementation self.random_state = random_state def score_samples(self, X, lengths=None): """ Compute the log probability under the model and compute posteriors. Parameters ---------- X : array-like, shape (n_samples, n_features) Feature matrix of individual samples. lengths : array-like of integers, shape (n_sequences, ), optional Lengths of the individual sequences in ``X``. The sum of these should be ``n_samples``. Returns ------- log_prob : float Log likelihood of ``X``. posteriors : array, shape (n_samples, n_components) State-membership probabilities for each sample in ``X``. See Also -------- score : Compute the log probability under the model. decode : Find most likely state sequence corresponding to ``X``. """ return self._score(X, lengths, compute_posteriors=True) def score(self, X, lengths=None): """ Compute the log probability under the model. Parameters ---------- X : array-like, shape (n_samples, n_features) Feature matrix of individual samples. lengths : array-like of integers, shape (n_sequences, ), optional Lengths of the individual sequences in ``X``. The sum of these should be ``n_samples``. Returns ------- log_prob : float Log likelihood of ``X``. See Also -------- score_samples : Compute the log probability under the model and posteriors. decode : Find most likely state sequence corresponding to ``X``. """ return self._score(X, lengths, compute_posteriors=False)[0] def _score(self, X, lengths=None, *, compute_posteriors): """ Helper for `score` and `score_samples`. Compute the log probability under the model, as well as posteriors if *compute_posteriors* is True (otherwise, an empty array is returned for the latter). """ check_is_fitted(self, "startprob_") self._check() X = check_array(X) impl = { "scaling": self._score_scaling, "log": self._score_log, }[self.implementation] return impl( X=X, lengths=lengths, compute_posteriors=compute_posteriors) def _score_log(self, X, lengths=None, *, compute_posteriors): """ Compute the log probability under the model, as well as posteriors if *compute_posteriors* is True (otherwise, an empty array is returned for the latter). """ log_prob = 0 sub_posteriors = [np.empty((0, self.n_components))] for sub_X in _utils.split_X_lengths(X, lengths): log_frameprob = self._compute_log_likelihood(sub_X) log_probij, fwdlattice = _hmmc.forward_log( self.startprob_, self.transmat_, log_frameprob) log_prob += log_probij if compute_posteriors: bwdlattice = _hmmc.backward_log( self.startprob_, self.transmat_, log_frameprob) sub_posteriors.append( self._compute_posteriors_log(fwdlattice, bwdlattice)) return log_prob, np.concatenate(sub_posteriors) def _score_scaling(self, X, lengths=None, *, compute_posteriors): log_prob = 0 sub_posteriors = [np.empty((0, self.n_components))] for sub_X in _utils.split_X_lengths(X, lengths): frameprob = self._compute_likelihood(sub_X) log_probij, fwdlattice, scaling_factors = _hmmc.forward_scaling( self.startprob_, self.transmat_, frameprob) log_prob += log_probij if compute_posteriors: bwdlattice = _hmmc.backward_scaling( self.startprob_, self.transmat_, frameprob, scaling_factors) sub_posteriors.append( self._compute_posteriors_scaling(fwdlattice, bwdlattice)) return log_prob, np.concatenate(sub_posteriors) def _decode_viterbi(self, X): log_frameprob = self._compute_log_likelihood(X) return _hmmc.viterbi(self.startprob_, self.transmat_, log_frameprob) def _decode_map(self, X): _, posteriors = self.score_samples(X) log_prob = np.max(posteriors, axis=1).sum() state_sequence = np.argmax(posteriors, axis=1) return log_prob, state_sequence def decode(self, X, lengths=None, algorithm=None): """ Find most likely state sequence corresponding to ``X``. Parameters ---------- X : array-like, shape (n_samples, n_features) Feature matrix of individual samples. lengths : array-like of integers, shape (n_sequences, ), optional Lengths of the individual sequences in ``X``. The sum of these should be ``n_samples``. algorithm : string Decoder algorithm. Must be one of "viterbi" or "map". If not given, :attr:`decoder` is used. Returns ------- log_prob : float Log probability of the produced state sequence. state_sequence : array, shape (n_samples, ) Labels for each sample from ``X`` obtained via a given decoder ``algorithm``. See Also -------- score_samples : Compute the log probability under the model and posteriors. score : Compute the log probability under the model. """ check_is_fitted(self, "startprob_") self._check() algorithm = algorithm or self.algorithm if algorithm not in DECODER_ALGORITHMS: raise ValueError(f"Unknown decoder {algorithm!r}") decoder = { "viterbi": self._decode_viterbi, "map": self._decode_map }[algorithm] X = check_array(X) log_prob = 0 sub_state_sequences = [] for sub_X in _utils.split_X_lengths(X, lengths): # XXX decoder works on a single sample at a time! sub_log_prob, sub_state_sequence = decoder(sub_X) log_prob += sub_log_prob sub_state_sequences.append(sub_state_sequence) return log_prob, np.concatenate(sub_state_sequences) def predict(self, X, lengths=None): """ Find most likely state sequence corresponding to ``X``. Parameters ---------- X : array-like, shape (n_samples, n_features) Feature matrix of individual samples. lengths : array-like of integers, shape (n_sequences, ), optional Lengths of the individual sequences in ``X``. The sum of these should be ``n_samples``. Returns ------- state_sequence : array, shape (n_samples, ) Labels for each sample from ``X``. """ _, state_sequence = self.decode(X, lengths) return state_sequence def predict_proba(self, X, lengths=None): """ Compute the posterior probability for each state in the model. Parameters ---------- X : array-like, shape (n_samples, n_features) Feature matrix of individual samples. lengths : array-like of integers, shape (n_sequences, ), optional Lengths of the individual sequences in ``X``. The sum of these should be ``n_samples``. Returns ------- posteriors : array, shape (n_samples, n_components) State-membership probabilities for each sample from ``X``. """ _, posteriors = self.score_samples(X, lengths) return posteriors def sample(self, n_samples=1, random_state=None, currstate=None): """ Generate random samples from the model. Parameters ---------- n_samples : int Number of samples to generate. random_state : RandomState or an int seed A random number generator instance. If ``None``, the object's ``random_state`` is used. currstate : int Current state, as the initial state of the samples. Returns ------- X : array, shape (n_samples, n_features) Feature matrix. state_sequence : array, shape (n_samples, ) State sequence produced by the model. Examples -------- :: # generate samples continuously _, Z = model.sample(n_samples=10) X, Z = model.sample(n_samples=10, currstate=Z[-1]) """ check_is_fitted(self, "startprob_") self._check() if random_state is None: random_state = self.random_state random_state = check_random_state(random_state) transmat_cdf = np.cumsum(self.transmat_, axis=1) if currstate is None: startprob_cdf = np.cumsum(self.startprob_) currstate = (startprob_cdf > random_state.rand()).argmax() state_sequence = [currstate] X = [self._generate_sample_from_state( currstate, random_state=random_state)] for t in range(n_samples - 1): currstate = ( (transmat_cdf[currstate] > random_state.rand()).argmax()) state_sequence.append(currstate) X.append(self._generate_sample_from_state( currstate, random_state=random_state)) return np.atleast_2d(X), np.array(state_sequence, dtype=int) def fit(self, X, lengths=None): """ Estimate model parameters. An initialization step is performed before entering the EM algorithm. If you want to avoid this step for a subset of the parameters, pass proper ``init_params`` keyword argument to estimator's constructor. Parameters ---------- X : array-like, shape (n_samples, n_features) Feature matrix of individual samples. lengths : array-like of integers, shape (n_sequences, ) Lengths of the individual sequences in ``X``. The sum of these should be ``n_samples``. Returns ------- self : object Returns self. """ X = check_array(X) if lengths is None: lengths = np.asarray([X.shape[0]]) self._init(X, lengths) self._check() self.monitor_._reset() for iter in range(self.n_iter): stats, curr_logprob = self._do_estep(X, lengths) # Compute lower bound before updating model parameters lower_bound = self._compute_lower_bound(curr_logprob) # XXX must be before convergence check, because otherwise # there won't be any updates for the case ``n_iter=1``. self._do_mstep(stats) self.monitor_.report(lower_bound) if self.monitor_.converged: break if (self.transmat_.sum(axis=1) == 0).any(): _log.warning("Some rows of transmat_ have zero sum because no " "transition from the state was ever observed.") return self def _fit_scaling(self, X): raise NotImplementedError("Must be overridden in subclass") def _fit_log(self, X): raise NotImplementedError("Must be overridden in subclass") def _compute_posteriors_scaling(self, fwdlattice, bwdlattice): posteriors = fwdlattice * bwdlattice normalize(posteriors, axis=1) return posteriors def _compute_posteriors_log(self, fwdlattice, bwdlattice): # gamma is guaranteed to be correctly normalized by log_prob at # all frames, unless we do approximate inference using pruning. # So, we will normalize each frame explicitly in case we # pruned too aggressively. log_gamma = fwdlattice + bwdlattice log_normalize(log_gamma, axis=1) with np.errstate(under="ignore"): return np.exp(log_gamma) def _needs_init(self, code, name): if code in self.init_params: if hasattr(self, name): _log.warning( "Even though the %r attribute is set, it will be " "overwritten during initialization because 'init_params' " "contains %r", name, code) return True if not hasattr(self, name): return True return False def _check_and_set_n_features(self, X): _, n_features = X.shape if hasattr(self, "n_features"): if self.n_features != n_features: raise ValueError( f"Unexpected number of dimensions, got {n_features} but " f"expected {self.n_features}") else: self.n_features = n_features def _get_n_fit_scalars_per_param(self): """ Return a mapping of fittable parameter names (as in ``self.params``) to the number of corresponding scalar parameters that will actually be fitted. This is used to detect whether the user did not pass enough data points for a non-degenerate fit. """ raise NotImplementedError("Must be overridden in subclass") def _check_sum_1(self, name): """Check that an array describes one or more distributions.""" s = getattr(self, name).sum(axis=-1) if not np.allclose(s, 1): raise ValueError( f"{name} must sum to 1 (got {s:.4f})" if s.ndim == 0 else f"{name} rows must sum to 1 (got {s})" if s.ndim == 1 else "Expected 1D or 2D array") def _check(self): """ Validate model parameters prior to fitting. Raises ------ ValueError If any of the parameters are invalid, e.g. if :attr:`startprob_` don't sum to 1. """ raise NotImplementedError("Must be overridden in subclass") def _compute_likelihood(self, X): """ Compute per-component probability under the model. Parameters ---------- X : array-like, shape (n_samples, n_features) Feature matrix of individual samples. Returns ------- log_prob : array, shape (n_samples, n_components) Log probability of each sample in ``X`` for each of the model states. """ if (self._compute_log_likelihood # prevent recursion != __class__._compute_log_likelihood.__get__(self)): # Probabilities equal to zero do occur, and exp(-LARGE) = 0 is OK. with np.errstate(under="ignore"): return np.exp(self._compute_log_likelihood(X)) else: raise NotImplementedError("Must be overridden in subclass") def _compute_log_likelihood(self, X): """ Compute per-component emission log probability under the model. Parameters ---------- X : array-like, shape (n_samples, n_features) Feature matrix of individual samples. Returns ------- log_prob : array, shape (n_samples, n_components) Emission log probability of each sample in ``X`` for each of the model states, i.e., ``log(p(X|state))``. """ if (self._compute_likelihood # prevent recursion != __class__._compute_likelihood.__get__(self)): # Probabilities equal to zero do occur, and log(0) = -inf is OK. likelihood = self._compute_likelihood(X) with np.errstate(divide="ignore"): return np.log(likelihood) else: raise NotImplementedError("Must be overridden in subclass") def _generate_sample_from_state(self, state, random_state): """ Generate a random sample from a given component. Parameters ---------- state : int Index of the component to condition on. random_state: RandomState A random number generator instance. (`sample` is the only caller for this method and already normalizes *random_state*.) Returns ------- X : array, shape (n_features, ) A random sample from the emission distribution corresponding to a given component. """ return () def _initialize_sufficient_statistics(self): """ Initialize sufficient statistics required for M-step. The method is *pure*, meaning that it doesn't change the state of the instance. For extensibility computed statistics are stored in a dictionary. Returns ------- nobs : int Number of samples in the data. start : array, shape (n_components, ) An array where the i-th element corresponds to the posterior probability of the first sample being generated by the i-th state. trans : array, shape (n_components, n_components) An array where the (i, j)-th element corresponds to the posterior probability of transitioning between the i-th to j-th states. """ stats = {'nobs': 0, 'start': np.zeros(self.n_components), 'trans': np.zeros((self.n_components, self.n_components))} return stats def _accumulate_sufficient_statistics( self, stats, X, lattice, posteriors, fwdlattice, bwdlattice): """ Update sufficient statistics from a given sample. Parameters ---------- stats : dict Sufficient statistics as returned by :meth:`~.BaseHMM._initialize_sufficient_statistics`. X : array, shape (n_samples, n_features) Sample sequence. lattice : array, shape (n_samples, n_components) Probabilities OR Log Probabilities of each sample under each of the model states. Depends on the choice of implementation of the Forward-Backward algorithm posteriors : array, shape (n_samples, n_components) Posterior probabilities of each sample being generated by each of the model states. fwdlattice, bwdlattice : array, shape (n_samples, n_components) forward and backward probabilities. """ impl = { "scaling": self._accumulate_sufficient_statistics_scaling, "log": self._accumulate_sufficient_statistics_log, }[self.implementation] return impl(stats=stats, X=X, lattice=lattice, posteriors=posteriors, fwdlattice=fwdlattice, bwdlattice=bwdlattice) def _accumulate_sufficient_statistics_scaling( self, stats, X, lattice, posteriors, fwdlattice, bwdlattice): """ Implementation of `_accumulate_sufficient_statistics` for ``implementation = "log"``. """ stats['nobs'] += 1 if 's' in self.params: stats['start'] += posteriors[0] if 't' in self.params: n_samples, n_components = lattice.shape # when the sample is of length 1, it contains no transitions # so there is no reason to update our trans. matrix estimate if n_samples <= 1: return xi_sum = _hmmc.compute_scaling_xi_sum( fwdlattice, self.transmat_, bwdlattice, lattice) stats['trans'] += xi_sum def _accumulate_sufficient_statistics_log( self, stats, X, lattice, posteriors, fwdlattice, bwdlattice): """ Implementation of `_accumulate_sufficient_statistics` for ``implementation = "log"``. """ stats['nobs'] += 1 if 's' in self.params: stats['start'] += posteriors[0] if 't' in self.params: n_samples, n_components = lattice.shape # when the sample is of length 1, it contains no transitions # so there is no reason to update our trans. matrix estimate if n_samples <= 1: return log_xi_sum = _hmmc.compute_log_xi_sum( fwdlattice, self.transmat_, bwdlattice, lattice) with np.errstate(under="ignore"): stats['trans'] += np.exp(log_xi_sum) def _do_mstep(self, stats): """ Perform the M-step of EM algorithm. Parameters ---------- stats : dict Sufficient statistics updated from all available samples. """ def _do_estep(self, X, lengths): impl = { "scaling": self._fit_scaling, "log": self._fit_log, }[self.implementation] stats = self._initialize_sufficient_statistics() self._estep_begin() curr_logprob = 0 for sub_X in _utils.split_X_lengths(X, lengths): lattice, logprob, posteriors, fwdlattice, bwdlattice = impl(sub_X) # Derived HMM classes will implement the following method to # update their probability distributions, so keep # a single call to this method for simplicity. self._accumulate_sufficient_statistics( stats, sub_X, lattice, posteriors, fwdlattice, bwdlattice) curr_logprob += logprob return stats, curr_logprob def _estep_begin(self): pass def _compute_lower_bound(self, curr_logprob): raise NotImplementedError("Must be overridden in subclass") class BaseHMM(_AbstractHMM): """ Base class for Hidden Markov Models learned from Expectation-Maximization. This class allows for easy evaluation of, sampling from, and maximum a posteriori estimation of the parameters of a HMM. Attributes ---------- monitor_ : ConvergenceMonitor Monitor object used to check the convergence of EM. startprob_ : array, shape (n_components, ) Initial state occupation distribution. transmat_ : array, shape (n_components, n_components) Matrix of transition probabilities between states. Notes ----- Normally, one should use a subclass of `.BaseHMM`, with its specialization towards a given emission model. In rare cases, the base class can also be useful in itself, if one simply wants to generate a sequence of states using `.BaseHMM.sample`. In that case, the feature matrix will have zero features. """ def __init__(self, n_components=1, startprob_prior=1.0, transmat_prior=1.0, algorithm="viterbi", random_state=None, n_iter=10, tol=1e-2, verbose=False, params=string.ascii_letters, init_params=string.ascii_letters, implementation="log"): """ Parameters ---------- n_components : int Number of states in the model. startprob_prior : array, shape (n_components, ), optional Parameters of the Dirichlet prior distribution for :attr:`startprob_`. transmat_prior : array, shape (n_components, n_components), optional Parameters of the Dirichlet prior distribution for each row of the transition probabilities :attr:`transmat_`. algorithm : {"viterbi", "map"}, optional Decoder algorithm. random_state: RandomState or an int seed, optional A random number generator instance. n_iter : int, optional Maximum number of iterations to perform. tol : float, optional Convergence threshold. EM will stop if the gain in log-likelihood is below this value. verbose : bool, optional Whether per-iteration convergence reports are printed to :data:`sys.stderr`. Convergence can also be diagnosed using the :attr:`monitor_` attribute. params, init_params : string, optional The parameters that get updated during (``params``) or initialized before (``init_params``) the training. Can contain any combination of 's' for startprob, 't' for transmat, and other characters for subclass-specific emission parameters. Defaults to all parameters. implementation: string, optional Determines if the forward-backward algorithm is implemented with logarithms ("log"), or using scaling ("scaling"). The default is to use logarithms for backwards compatability. However, the scaling implementation is generally faster. """ super().__init__( n_components=n_components, algorithm=algorithm, random_state=random_state, n_iter=n_iter, tol=tol, verbose=verbose, params=params, init_params=init_params, implementation=implementation) self.startprob_prior = startprob_prior self.transmat_prior = transmat_prior self.monitor_ = ConvergenceMonitor(self.tol, self.n_iter, self.verbose) def get_stationary_distribution(self): """Compute the stationary distribution of states.""" # The stationary distribution is proportional to the left-eigenvector # associated with the largest eigenvalue (i.e., 1) of the transition # matrix. check_is_fitted(self, "transmat_") eigvals, eigvecs = linalg.eig(self.transmat_.T) eigvec = np.real_if_close(eigvecs[:, np.argmax(eigvals)]) return eigvec / eigvec.sum() def _fit_scaling(self, X): frameprob = self._compute_likelihood(X) log_prob, fwdlattice, scaling_factors = _hmmc.forward_scaling( self.startprob_, self.transmat_, frameprob) bwdlattice = _hmmc.backward_scaling( self.startprob_, self.transmat_, frameprob, scaling_factors) posteriors = self._compute_posteriors_scaling(fwdlattice, bwdlattice) return frameprob, log_prob, posteriors, fwdlattice, bwdlattice def _fit_log(self, X): log_frameprob = self._compute_log_likelihood(X) log_prob, fwdlattice = _hmmc.forward_log( self.startprob_, self.transmat_, log_frameprob) bwdlattice = _hmmc.backward_log( self.startprob_, self.transmat_, log_frameprob) posteriors = self._compute_posteriors_log(fwdlattice, bwdlattice) return log_frameprob, log_prob, posteriors, fwdlattice, bwdlattice def _do_mstep(self, stats): """ Perform the M-step of EM algorithm. Parameters ---------- stats : dict Sufficient statistics updated from all available samples. """ # If a prior is < 1, `prior - 1 + starts['start']` can be negative. In # that case maximization of (n1+e1) log p1 + ... + (ns+es) log ps under # the conditions sum(p) = 1 and all(p >= 0) show that the negative # terms can just be set to zero. # The ``np.where`` calls guard against updating forbidden states # or transitions in e.g. a left-right HMM. if 's' in self.params: startprob_ = np.maximum(self.startprob_prior - 1 + stats['start'], 0) self.startprob_ = np.where(self.startprob_ == 0, 0, startprob_) normalize(self.startprob_) if 't' in self.params: transmat_ = np.maximum(self.transmat_prior - 1 + stats['trans'], 0) self.transmat_ = np.where(self.transmat_ == 0, 0, transmat_) normalize(self.transmat_, axis=1) def _compute_lower_bound(self, curr_logprob): return curr_logprob def _init(self, X, lengths=None): """ Initialize model parameters prior to fitting. Parameters ---------- X : array-like, shape (n_samples, n_features) Feature matrix of individual samples. """ self._check_and_set_n_features(X) init = 1. / self.n_components random_state = check_random_state(self.random_state) if self._needs_init("s", "startprob_"): self.startprob_ = random_state.dirichlet( np.full(self.n_components, init)) if self._needs_init("t", "transmat_"): self.transmat_ = random_state.dirichlet( np.full(self.n_components, init), size=self.n_components) n_fit_scalars_per_param = self._get_n_fit_scalars_per_param() if n_fit_scalars_per_param is not None: n_fit_scalars = sum( n_fit_scalars_per_param[p] for p in self.params) if X.size < n_fit_scalars: _log.warning( "Fitting a model with %d free scalar parameters with only " "%d data points will result in a degenerate solution.", n_fit_scalars, X.size) def _check_sum_1(self, name): """Check that an array describes one or more distributions.""" s = getattr(self, name).sum(axis=-1) if not np.allclose(s, 1): raise ValueError( f"{name} must sum to 1 (got {s:.4f})" if s.ndim == 0 else f"{name} rows must sum to 1 (got {s})" if s.ndim == 1 else "Expected 1D or 2D array") def _check(self): """ Validate model parameters prior to fitting. Raises ------ ValueError If any of the parameters are invalid, e.g. if :attr:`startprob_` don't sum to 1. """ self.startprob_ = np.asarray(self.startprob_) if len(self.startprob_) != self.n_components: raise ValueError("startprob_ must have length n_components") self._check_sum_1("startprob_") self.transmat_ = np.asarray(self.transmat_) if self.transmat_.shape != (self.n_components, self.n_components): raise ValueError( "transmat_ must have shape (n_components, n_components)") self._check_sum_1("transmat_") def aic(self, X, lengths=None): """ Akaike information criterion for the current model on the input X. AIC = -2*logLike + 2 * num_free_params https://en.wikipedia.org/wiki/Akaike_information_criterion Parameters ---------- X : array of shape (n_samples, n_dimensions) The input samples. lengths : array-like of integers, shape (n_sequences, ) Lengths of the individual sequences in ``X``. The sum of these should be ``n_samples``. Returns ------- aic : float The lower the better. """ n_params = sum(self._get_n_fit_scalars_per_param().values()) return -2 * self.score(X, lengths=lengths) + 2 * n_params def bic(self, X, lengths=None): """ Bayesian information criterion for the current model on the input X. BIC = -2*logLike + num_free_params * log(num_of_data) https://en.wikipedia.org/wiki/Bayesian_information_criterion Parameters ---------- X : array of shape (n_samples, n_dimensions) The input samples. lengths : array-like of integers, shape (n_sequences, ) Lengths of the individual sequences in ``X``. The sum of these should be ``n_samples``. Returns ------- bic : float The lower the better. """ n_params = sum(self._get_n_fit_scalars_per_param().values()) return -2 * self.score(X, lengths=lengths) + n_params * np.log(len(X)) _BaseHMM = BaseHMM # Backcompat name, will be deprecated in the future. class VariationalBaseHMM(_AbstractHMM): def __init__(self, n_components=1, startprob_prior=None, transmat_prior=None, algorithm="viterbi", random_state=None, n_iter=100, tol=1e-6, verbose=False, params="ste", init_params="ste", implementation="log"): super().__init__( n_components=n_components, algorithm=algorithm, random_state=random_state, n_iter=n_iter, tol=tol, verbose=verbose, params=params, init_params=init_params, implementation=implementation) self.startprob_prior = startprob_prior self.transmat_prior = transmat_prior self.monitor_ = ConvergenceMonitor( self.tol, self.n_iter, self.verbose) def _init(self, X, lengths=None): """ Initialize model parameters prior to fitting. Parameters ---------- X : array-like, shape (n_samples, n_features) Feature matrix of individual samples. lengths : array-like of integers, shape (n_sequences, ) Lengths of the individual sequences in ``X``. The sum of these should be ``n_samples``. """ self._check_and_set_n_features(X) nc = self.n_components uniform_prior = 1 / nc random_state = check_random_state(self.random_state) if (self._needs_init("s", "startprob_posterior_") or self._needs_init("s", "startprob_prior_")): if self.startprob_prior is None: startprob_init = uniform_prior else: startprob_init = self.startprob_prior self.startprob_prior_ = np.full(nc, startprob_init) self.startprob_posterior_ = random_state.dirichlet( np.full(nc, uniform_prior)) * len(lengths) if (self._needs_init("t", "transmat_posterior_") or self._needs_init("t", "transmat_prior_")): if self.transmat_prior is None: transmat_init = uniform_prior else: transmat_init = self.transmat_prior self.transmat_prior_ = np.full( (nc, nc), transmat_init) self.transmat_posterior_ = random_state.dirichlet( np.full(nc, uniform_prior), size=nc) self.transmat_posterior_ *= sum(lengths) / nc n_fit_scalars_per_param = self._get_n_fit_scalars_per_param() if n_fit_scalars_per_param is not None: n_fit_scalars = sum( n_fit_scalars_per_param[p] for p in self.params) if X.size < n_fit_scalars: _log.warning( "Fitting a model with %d free scalar parameters with only " "%d data points will result in a degenerate solution.", n_fit_scalars, X.size) # For Variational Inference, we compute the forward/backward algorithm # using subnormalized probabilities. def _fit_scaling(self, X): frameprob = self._compute_subnorm_likelihood(X) logprob, fwdlattice, scaling_factors = _hmmc.forward_scaling( self.startprob_subnorm_, self.transmat_subnorm_, frameprob) bwdlattice = _hmmc.backward_scaling( self.startprob_subnorm_, self.transmat_subnorm_, frameprob, scaling_factors) posteriors = self._compute_posteriors_scaling(fwdlattice, bwdlattice) return frameprob, logprob, posteriors, fwdlattice, bwdlattice def _fit_log(self, X): framelogprob = self._compute_subnorm_log_likelihood(X) logprob, fwdlattice = _hmmc.forward_log( self.startprob_subnorm_, self.transmat_subnorm_, framelogprob) bwdlattice = _hmmc.backward_log( self.startprob_subnorm_, self.transmat_subnorm_, framelogprob) posteriors = self._compute_posteriors_log(fwdlattice, bwdlattice) return framelogprob, logprob, posteriors, fwdlattice, bwdlattice def _check(self): """ Validate model parameters prior to fitting. Raises ------ ValueError If any of the parameters are invalid, e.g. if :attr:`startprob_` don't sum to 1. """ nc = self.n_components self.startprob_prior_ = np.asarray(self.startprob_prior_) if len(self.startprob_prior_) != nc: raise ValueError("startprob_prior_ must have length n_components") self.startprob_posterior_ = np.asarray(self.startprob_posterior_) if len(self.startprob_posterior_) != nc: raise ValueError("startprob_posterior_ must have length " "n_components") self.transmat_prior_ = np.asarray(self.transmat_prior_) if self.transmat_prior_.shape != (nc, nc): raise ValueError("transmat_prior_ must have shape " "(n_components, n_components)") self.transmat_posterior_ = np.asarray(self.transmat_posterior_) if self.transmat_posterior_.shape != (nc, nc): raise ValueError("transmat_posterior_ must have shape " "(n_components, n_components)") def _compute_subnorm_likelihood(self, X): if (self._compute_subnorm_log_likelihood != # prevent recursion __class__._compute_subnorm_log_likelihood.__get__(self)): return np.exp(self._compute_subnorm_log_likelihood(X)) else: raise NotImplementedError("Must be overridden in subclass") def _compute_subnorm_log_likelihood(self, X): if (self._compute_subnorm_likelihood != # prevent recursion __class__._compute_subnorm_likelihood.__get__(self)): return np.log(self._compute_subnorm_likelihood(X)) else: raise NotImplementedError("Must be overridden in subclass") def _accumulate_sufficient_statistics_scaling( self, stats, X, lattice, posteriors, fwdlattice, bwdlattice): """ Implementation of `_accumulate_sufficient_statistics` for ``implementation = "log"``. """ stats['nobs'] += 1 if 's' in self.params: stats['start'] += posteriors[0] if 't' in self.params: n_samples, n_components = lattice.shape # when the sample is of length 1, it contains no transitions # so there is no reason to update our trans. matrix estimate if n_samples <= 1: return xi_sum = _hmmc.compute_scaling_xi_sum(fwdlattice, self.transmat_subnorm_, bwdlattice, lattice) stats['trans'] += xi_sum def _accumulate_sufficient_statistics_log( self, stats, X, lattice, posteriors, fwdlattice, bwdlattice): """ Implementation of `_accumulate_sufficient_statistics` for ``implementation = "log"``. """ stats['nobs'] += 1 if 's' in self.params: stats['start'] += posteriors[0] if 't' in self.params: n_samples, n_components = lattice.shape # when the sample is of length 1, it contains no transitions # so there is no reason to update our trans. matrix estimate if n_samples <= 1: return log_xi_sum = _hmmc.compute_log_xi_sum( fwdlattice, self.transmat_subnorm_, bwdlattice, lattice) with np.errstate(under="ignore"): stats['trans'] += np.exp(log_xi_sum) def _estep_begin(self): """ Update the subnormalized model parameters. Called at the beginning of each iteration of fit() """ startprob_log_subnorm = ( special.digamma(self.startprob_posterior_) - special.digamma(self.startprob_posterior_.sum())) self.startprob_subnorm_ = np.exp(startprob_log_subnorm) transmat_log_subnorm = ( special.digamma(self.transmat_posterior_) - special.digamma(self.transmat_posterior_.sum(axis=1)[:, None])) self.transmat_subnorm_ = np.exp(transmat_log_subnorm) def _do_mstep(self, stats): """ Perform the M-step of EM algorithm. Parameters ---------- stats : dict Sufficient statistics updated from all available samples. """ if 's' in self.params: self.startprob_posterior_ = self.startprob_prior_ + stats['start'] # For compatability in _AbstractHMM self.startprob_ = (self.startprob_posterior_ / self.startprob_posterior_.sum()) if 't' in self.params: self.transmat_posterior_ = self.transmat_prior_ + stats['trans'] # For compatability in _AbstractHMM self.transmat_ = (self.transmat_posterior_ / self.transmat_posterior_.sum(axis=1)[:, None]) def _compute_lower_bound(self, curr_logprob): """ Compute the Variational Lower Bound of the model as currently configured. Following the pattern elsewhere, derived implementations should call this method to get the contribution of the current log_prob, transmat, and startprob towards the lower bound Parameters ---------- curr_logprob : float The current log probability of the data as computed at the subnormalized model parameters. Returns ------- lower_bound: float Returns the computed lower bound contribution of the log_prob, startprob, and transmat. """ # Get the contribution from the state transitions, # initial probabilities, and the likelihood of the sequences startprob_lower_bound = -_kl.kl_dirichlet( self.startprob_posterior_, self.startprob_prior_) transmat_lower_bound = 0 for i in range(self.n_components): transmat_lower_bound -= _kl.kl_dirichlet( self.transmat_posterior_[i], self.transmat_prior_[i]) return startprob_lower_bound + transmat_lower_bound + curr_logprob hmmlearn-0.3.0/lib/hmmlearn/hmm.py000066400000000000000000001220771441755530000170360ustar00rootroot00000000000000""" The :mod:`hmmlearn.hmm` module implements hidden Markov models. """ import logging import numpy as np from scipy import linalg from sklearn import cluster from sklearn.utils import check_random_state from . import _emissions, _utils from .base import BaseHMM from .utils import fill_covars, normalize __all__ = [ "GMMHMM", "GaussianHMM", "CategoricalHMM", "MultinomialHMM", "PoissonHMM", ] _log = logging.getLogger(__name__) COVARIANCE_TYPES = frozenset(("spherical", "diag", "full", "tied")) class CategoricalHMM(_emissions.BaseCategoricalHMM, BaseHMM): """ Hidden Markov Model with categorical (discrete) emissions. Attributes ---------- n_features : int Number of possible symbols emitted by the model (in the samples). monitor_ : ConvergenceMonitor Monitor object used to check the convergence of EM. startprob_ : array, shape (n_components, ) Initial state occupation distribution. transmat_ : array, shape (n_components, n_components) Matrix of transition probabilities between states. emissionprob_ : array, shape (n_components, n_features) Probability of emitting a given symbol when in each state. Examples -------- >>> from hmmlearn.hmm import CategoricalHMM >>> CategoricalHMM(n_components=2) #doctest: +ELLIPSIS CategoricalHMM(algorithm='viterbi',... """ def __init__(self, n_components=1, startprob_prior=1.0, transmat_prior=1.0, *, emissionprob_prior=1.0, n_features=None, algorithm="viterbi", random_state=None, n_iter=10, tol=1e-2, verbose=False, params="ste", init_params="ste", implementation="log"): """ Parameters ---------- n_components : int Number of states. startprob_prior : array, shape (n_components, ), optional Parameters of the Dirichlet prior distribution for :attr:`startprob_`. transmat_prior : array, shape (n_components, n_components), optional Parameters of the Dirichlet prior distribution for each row of the transition probabilities :attr:`transmat_`. emissionprob_prior : array, shape (n_components, n_features), optional Parameters of the Dirichlet prior distribution for :attr:`emissionprob_`. n_features: int, optional The number of categorical symbols in the HMM. Will be inferred from the data if not set. algorithm : {"viterbi", "map"}, optional Decoder algorithm. random_state: RandomState or an int seed, optional A random number generator instance. n_iter : int, optional Maximum number of iterations to perform. tol : float, optional Convergence threshold. EM will stop if the gain in log-likelihood is below this value. verbose : bool, optional Whether per-iteration convergence reports are printed to :data:`sys.stderr`. Convergence can also be diagnosed using the :attr:`monitor_` attribute. params, init_params : string, optional The parameters that get updated during (``params``) or initialized before (``init_params``) the training. Can contain any combination of 's' for startprob, 't' for transmat, and 'e' for emissionprob. Defaults to all parameters. implementation : string, optional Determines if the forward-backward algorithm is implemented with logarithms ("log"), or using scaling ("scaling"). The default is to use logarithms for backwards compatability. """ BaseHMM.__init__(self, n_components, startprob_prior=startprob_prior, transmat_prior=transmat_prior, algorithm=algorithm, random_state=random_state, n_iter=n_iter, tol=tol, verbose=verbose, params=params, init_params=init_params, implementation=implementation) self.emissionprob_prior = emissionprob_prior self.n_features = n_features def _init(self, X, lengths=None): super()._init(X, lengths) self.random_state = check_random_state(self.random_state) if self._needs_init('e', 'emissionprob_'): self.emissionprob_ = self.random_state.rand( self.n_components, self.n_features) normalize(self.emissionprob_, axis=1) def _check(self): super()._check() self.emissionprob_ = np.atleast_2d(self.emissionprob_) if self.n_features is None: self.n_features = self.emissionprob_.shape[1] if self.emissionprob_.shape != (self.n_components, self.n_features): raise ValueError( f"emissionprob_ must have shape" f"({self.n_components}, {self.n_features})") self._check_sum_1("emissionprob_") def _do_mstep(self, stats): super()._do_mstep(stats) if 'e' in self.params: self.emissionprob_ = np.maximum( self.emissionprob_prior - 1 + stats['obs'], 0) normalize(self.emissionprob_, axis=1) class GaussianHMM(_emissions.BaseGaussianHMM, BaseHMM): """ Hidden Markov Model with Gaussian emissions. Attributes ---------- n_features : int Dimensionality of the Gaussian emissions. monitor_ : ConvergenceMonitor Monitor object used to check the convergence of EM. startprob_ : array, shape (n_components, ) Initial state occupation distribution. transmat_ : array, shape (n_components, n_components) Matrix of transition probabilities between states. means_ : array, shape (n_components, n_features) Mean parameters for each state. covars_ : array Covariance parameters for each state. The shape depends on :attr:`covariance_type`: * (n_components, ) if "spherical", * (n_components, n_features) if "diag", * (n_components, n_features, n_features) if "full", * (n_features, n_features) if "tied". Examples -------- >>> from hmmlearn.hmm import GaussianHMM >>> GaussianHMM(n_components=2) #doctest: +ELLIPSIS GaussianHMM(algorithm='viterbi',... """ def __init__(self, n_components=1, covariance_type='diag', min_covar=1e-3, startprob_prior=1.0, transmat_prior=1.0, means_prior=0, means_weight=0, covars_prior=1e-2, covars_weight=1, algorithm="viterbi", random_state=None, n_iter=10, tol=1e-2, verbose=False, params="stmc", init_params="stmc", implementation="log"): """ Parameters ---------- n_components : int Number of states. covariance_type : {"spherical", "diag", "full", "tied"}, optional The type of covariance parameters to use: * "spherical" --- each state uses a single variance value that applies to all features (default). * "diag" --- each state uses a diagonal covariance matrix. * "full" --- each state uses a full (i.e. unrestricted) covariance matrix. * "tied" --- all states use **the same** full covariance matrix. min_covar : float, optional Floor on the diagonal of the covariance matrix to prevent overfitting. Defaults to 1e-3. startprob_prior : array, shape (n_components, ), optional Parameters of the Dirichlet prior distribution for :attr:`startprob_`. transmat_prior : array, shape (n_components, n_components), optional Parameters of the Dirichlet prior distribution for each row of the transition probabilities :attr:`transmat_`. means_prior, means_weight : array, shape (n_components, ), optional Mean and precision of the Normal prior distribtion for :attr:`means_`. covars_prior, covars_weight : array, shape (n_components, ), optional Parameters of the prior distribution for the covariance matrix :attr:`covars_`. If :attr:`covariance_type` is "spherical" or "diag" the prior is the inverse gamma distribution, otherwise --- the inverse Wishart distribution. algorithm : {"viterbi", "map"}, optional Decoder algorithm. random_state: RandomState or an int seed, optional A random number generator instance. n_iter : int, optional Maximum number of iterations to perform. tol : float, optional Convergence threshold. EM will stop if the gain in log-likelihood is below this value. verbose : bool, optional Whether per-iteration convergence reports are printed to :data:`sys.stderr`. Convergence can also be diagnosed using the :attr:`monitor_` attribute. params, init_params : string, optional The parameters that get updated during (``params``) or initialized before (``init_params``) the training. Can contain any combination of 's' for startprob, 't' for transmat, 'm' for means, and 'c' for covars. Defaults to all parameters. implementation : string, optional Determines if the forward-backward algorithm is implemented with logarithms ("log"), or using scaling ("scaling"). The default is to use logarithms for backwards compatability. """ super().__init__(n_components, startprob_prior=startprob_prior, transmat_prior=transmat_prior, algorithm=algorithm, random_state=random_state, n_iter=n_iter, tol=tol, params=params, verbose=verbose, init_params=init_params, implementation=implementation) self.covariance_type = covariance_type self.min_covar = min_covar self.means_prior = means_prior self.means_weight = means_weight self.covars_prior = covars_prior self.covars_weight = covars_weight @property def covars_(self): """Return covars as a full matrix.""" return fill_covars(self._covars_, self.covariance_type, self.n_components, self.n_features) @covars_.setter def covars_(self, covars): covars = np.array(covars, copy=True) _utils._validate_covars(covars, self.covariance_type, self.n_components) self._covars_ = covars def _init(self, X, lengths=None): super()._init(X, lengths) if self._needs_init("m", "means_"): kmeans = cluster.KMeans(n_clusters=self.n_components, random_state=self.random_state, n_init=1) # sklearn <1.4 backcompat. kmeans.fit(X) self.means_ = kmeans.cluster_centers_ if self._needs_init("c", "covars_"): cv = np.cov(X.T) + self.min_covar * np.eye(X.shape[1]) if not cv.shape: cv.shape = (1, 1) self.covars_ = \ _utils.distribute_covar_matrix_to_match_covariance_type( cv, self.covariance_type, self.n_components).copy() def _check(self): super()._check() self.means_ = np.asarray(self.means_) self.n_features = self.means_.shape[1] if self.covariance_type not in COVARIANCE_TYPES: raise ValueError( f"covariance_type must be one of {COVARIANCE_TYPES}") def _needs_sufficient_statistics_for_mean(self): return 'm' in self.params def _needs_sufficient_statistics_for_covars(self): return 'c' in self.params def _do_mstep(self, stats): super()._do_mstep(stats) means_prior = self.means_prior means_weight = self.means_weight # TODO: find a proper reference for estimates for different # covariance models. # Based on Huang, Acero, Hon, "Spoken Language Processing", # p. 443 - 445 denom = stats['post'][:, None] if 'm' in self.params: self.means_ = ((means_weight * means_prior + stats['obs']) / (means_weight + denom)) if 'c' in self.params: covars_prior = self.covars_prior covars_weight = self.covars_weight meandiff = self.means_ - means_prior if self.covariance_type in ('spherical', 'diag'): c_n = (means_weight * meandiff**2 + stats['obs**2'] - 2 * self.means_ * stats['obs'] + self.means_**2 * denom) c_d = max(covars_weight - 1, 0) + denom self._covars_ = (covars_prior + c_n) / np.maximum(c_d, 1e-5) if self.covariance_type == 'spherical': self._covars_ = np.tile(self._covars_.mean(1)[:, None], (1, self._covars_.shape[1])) elif self.covariance_type in ('tied', 'full'): c_n = np.empty((self.n_components, self.n_features, self.n_features)) for c in range(self.n_components): obsmean = np.outer(stats['obs'][c], self.means_[c]) c_n[c] = (means_weight * np.outer(meandiff[c], meandiff[c]) + stats['obs*obs.T'][c] - obsmean - obsmean.T + np.outer(self.means_[c], self.means_[c]) * stats['post'][c]) cvweight = max(covars_weight - self.n_features, 0) if self.covariance_type == 'tied': self._covars_ = ((covars_prior + c_n.sum(axis=0)) / (cvweight + stats['post'].sum())) elif self.covariance_type == 'full': self._covars_ = ((covars_prior + c_n) / (cvweight + stats['post'][:, None, None])) class GMMHMM(_emissions.BaseGMMHMM): """ Hidden Markov Model with Gaussian mixture emissions. Attributes ---------- monitor_ : ConvergenceMonitor Monitor object used to check the convergence of EM. startprob_ : array, shape (n_components, ) Initial state occupation distribution. transmat_ : array, shape (n_components, n_components) Matrix of transition probabilities between states. weights_ : array, shape (n_components, n_mix) Mixture weights for each state. means_ : array, shape (n_components, n_mix, n_features) Mean parameters for each mixture component in each state. covars_ : array Covariance parameters for each mixture components in each state. The shape depends on :attr:`covariance_type`: * (n_components, n_mix) if "spherical", * (n_components, n_mix, n_features) if "diag", * (n_components, n_mix, n_features, n_features) if "full" * (n_components, n_features, n_features) if "tied". """ def __init__(self, n_components=1, n_mix=1, min_covar=1e-3, startprob_prior=1.0, transmat_prior=1.0, weights_prior=1.0, means_prior=0.0, means_weight=0.0, covars_prior=None, covars_weight=None, algorithm="viterbi", covariance_type="diag", random_state=None, n_iter=10, tol=1e-2, verbose=False, params="stmcw", init_params="stmcw", implementation="log"): """ Parameters ---------- n_components : int Number of states in the model. n_mix : int Number of states in the GMM. covariance_type : {"sperical", "diag", "full", "tied"}, optional The type of covariance parameters to use: * "spherical" --- each state uses a single variance value that applies to all features. * "diag" --- each state uses a diagonal covariance matrix (default). * "full" --- each state uses a full (i.e. unrestricted) covariance matrix. * "tied" --- all mixture components of each state use **the same** full covariance matrix (note that this is not the same as for `GaussianHMM`). min_covar : float, optional Floor on the diagonal of the covariance matrix to prevent overfitting. Defaults to 1e-3. startprob_prior : array, shape (n_components, ), optional Parameters of the Dirichlet prior distribution for :attr:`startprob_`. transmat_prior : array, shape (n_components, n_components), optional Parameters of the Dirichlet prior distribution for each row of the transition probabilities :attr:`transmat_`. weights_prior : array, shape (n_mix, ), optional Parameters of the Dirichlet prior distribution for :attr:`weights_`. means_prior, means_weight : array, shape (n_mix, ), optional Mean and precision of the Normal prior distribtion for :attr:`means_`. covars_prior, covars_weight : array, shape (n_mix, ), optional Parameters of the prior distribution for the covariance matrix :attr:`covars_`. If :attr:`covariance_type` is "spherical" or "diag" the prior is the inverse gamma distribution, otherwise --- the inverse Wishart distribution. algorithm : {"viterbi", "map"}, optional Decoder algorithm. random_state: RandomState or an int seed, optional A random number generator instance. n_iter : int, optional Maximum number of iterations to perform. tol : float, optional Convergence threshold. EM will stop if the gain in log-likelihood is below this value. verbose : bool, optional Whether per-iteration convergence reports are printed to :data:`sys.stderr`. Convergence can also be diagnosed using the :attr:`monitor_` attribute. params, init_params : string, optional The parameters that get updated during (``params``) or initialized before (``init_params``) the training. Can contain any combination of 's' for startprob, 't' for transmat, 'm' for means, 'c' for covars, and 'w' for GMM mixing weights. Defaults to all parameters. implementation : string, optional Determines if the forward-backward algorithm is implemented with logarithms ("log"), or using scaling ("scaling"). The default is to use logarithms for backwards compatability. """ BaseHMM.__init__(self, n_components, startprob_prior=startprob_prior, transmat_prior=transmat_prior, algorithm=algorithm, random_state=random_state, n_iter=n_iter, tol=tol, verbose=verbose, params=params, init_params=init_params, implementation=implementation) self.covariance_type = covariance_type self.min_covar = min_covar self.n_mix = n_mix self.weights_prior = weights_prior self.means_prior = means_prior self.means_weight = means_weight self.covars_prior = covars_prior self.covars_weight = covars_weight def _init(self, X, lengths=None): super()._init(X, lengths=None) nc = self.n_components nf = self.n_features nm = self.n_mix def compute_cv(): return np.cov(X.T) + self.min_covar * np.eye(nf) # Default values for covariance prior parameters self._init_covar_priors() self._fix_priors_shape() main_kmeans = cluster.KMeans(n_clusters=nc, random_state=self.random_state, n_init=10) # sklearn >=1.2 compat. cv = None # covariance matrix labels = main_kmeans.fit_predict(X) main_centroid = np.mean(main_kmeans.cluster_centers_, axis=0) means = [] for label in range(nc): kmeans = cluster.KMeans(n_clusters=nm, random_state=self.random_state, n_init=10) # sklearn >=1.2 compat. X_cluster = X[np.where(labels == label)] if X_cluster.shape[0] >= nm: kmeans.fit(X_cluster) means.append(kmeans.cluster_centers_) else: if cv is None: cv = compute_cv() m_cluster = np.random.multivariate_normal(main_centroid, cov=cv, size=nm) means.append(m_cluster) if self._needs_init("w", "weights_"): self.weights_ = np.full((nc, nm), 1 / nm) if self._needs_init("m", "means_"): self.means_ = np.stack(means) if self._needs_init("c", "covars_"): if cv is None: cv = compute_cv() if not cv.shape: cv.shape = (1, 1) if self.covariance_type == 'tied': self.covars_ = np.zeros((nc, nf, nf)) self.covars_[:] = cv elif self.covariance_type == 'full': self.covars_ = np.zeros((nc, nm, nf, nf)) self.covars_[:] = cv elif self.covariance_type == 'diag': self.covars_ = np.zeros((nc, nm, nf)) self.covars_[:] = np.diag(cv) elif self.covariance_type == 'spherical': self.covars_ = np.zeros((nc, nm)) self.covars_[:] = cv.mean() def _init_covar_priors(self): if self.covariance_type == "full": if self.covars_prior is None: self.covars_prior = 0.0 if self.covars_weight is None: self.covars_weight = -(1.0 + self.n_features + 1.0) elif self.covariance_type == "tied": if self.covars_prior is None: self.covars_prior = 0.0 if self.covars_weight is None: self.covars_weight = -(self.n_mix + self.n_features + 1.0) elif self.covariance_type == "diag": if self.covars_prior is None: self.covars_prior = -1.5 if self.covars_weight is None: self.covars_weight = 0.0 elif self.covariance_type == "spherical": if self.covars_prior is None: self.covars_prior = -(self.n_mix + 2.0) / 2.0 if self.covars_weight is None: self.covars_weight = 0.0 def _fix_priors_shape(self): nc = self.n_components nf = self.n_features nm = self.n_mix # If priors are numbers, this function will make them into a # matrix of proper shape self.weights_prior = np.broadcast_to( self.weights_prior, (nc, nm)).copy() self.means_prior = np.broadcast_to( self.means_prior, (nc, nm, nf)).copy() self.means_weight = np.broadcast_to( self.means_weight, (nc, nm)).copy() if self.covariance_type == "full": self.covars_prior = np.broadcast_to( self.covars_prior, (nc, nm, nf, nf)).copy() self.covars_weight = np.broadcast_to( self.covars_weight, (nc, nm)).copy() elif self.covariance_type == "tied": self.covars_prior = np.broadcast_to( self.covars_prior, (nc, nf, nf)).copy() self.covars_weight = np.broadcast_to( self.covars_weight, nc).copy() elif self.covariance_type == "diag": self.covars_prior = np.broadcast_to( self.covars_prior, (nc, nm, nf)).copy() self.covars_weight = np.broadcast_to( self.covars_weight, (nc, nm, nf)).copy() elif self.covariance_type == "spherical": self.covars_prior = np.broadcast_to( self.covars_prior, (nc, nm)).copy() self.covars_weight = np.broadcast_to( self.covars_weight, (nc, nm)).copy() def _check(self): super()._check() if not hasattr(self, "n_features"): self.n_features = self.means_.shape[2] nc = self.n_components nf = self.n_features nm = self.n_mix self._init_covar_priors() self._fix_priors_shape() # Checking covariance type if self.covariance_type not in COVARIANCE_TYPES: raise ValueError( f"covariance_type must be one of {COVARIANCE_TYPES}") self.weights_ = np.array(self.weights_) # Checking mixture weights' shape if self.weights_.shape != (nc, nm): raise ValueError( f"weights_ must have shape (n_components, n_mix), " f"actual shape: {self.weights_.shape}") # Checking mixture weights' mathematical correctness self._check_sum_1("weights_") # Checking means' shape self.means_ = np.array(self.means_) if self.means_.shape != (nc, nm, nf): raise ValueError( f"means_ must have shape (n_components, n_mix, n_features), " f"actual shape: {self.means_.shape}") # Checking covariances' shape self.covars_ = np.array(self.covars_) covars_shape = self.covars_.shape needed_shapes = { "spherical": (nc, nm), "tied": (nc, nf, nf), "diag": (nc, nm, nf), "full": (nc, nm, nf, nf), } needed_shape = needed_shapes[self.covariance_type] if covars_shape != needed_shape: raise ValueError( f"{self.covariance_type!r} mixture covars must have shape " f"{needed_shape}, actual shape: {covars_shape}") # Checking covariances' mathematical correctness if (self.covariance_type == "spherical" or self.covariance_type == "diag"): if np.any(self.covars_ < 0): raise ValueError(f"{self.covariance_type!r} mixture covars " f"must be non-negative") if np.any(self.covars_ == 0): _log.warning("Degenerate mixture covariance") elif self.covariance_type == "tied": for i, covar in enumerate(self.covars_): if not np.allclose(covar, covar.T): raise ValueError( f"Covariance of state #{i} is not symmetric") min_eigvalsh = linalg.eigvalsh(covar).min() if min_eigvalsh < 0: raise ValueError( f"Covariance of state #{i} is not positive definite") if min_eigvalsh == 0: _log.warning("Covariance of state #%d has a null " "eigenvalue.", i) elif self.covariance_type == "full": for i, mix_covars in enumerate(self.covars_): for j, covar in enumerate(mix_covars): if not np.allclose(covar, covar.T): raise ValueError( f"Covariance of state #{i}, mixture #{j} is not " f"symmetric") min_eigvalsh = linalg.eigvalsh(covar).min() if min_eigvalsh < 0: raise ValueError( f"Covariance of state #{i}, mixture #{j} is not " f"positive definite") if min_eigvalsh == 0: _log.warning("Covariance of state #%d, mixture #%d " "has a null eigenvalue.", i, j) def _do_mstep(self, stats): super()._do_mstep(stats) nf = self.n_features nm = self.n_mix # Maximizing weights if 'w' in self.params: alphas_minus_one = self.weights_prior - 1 w_n = stats['post_mix_sum'] + alphas_minus_one w_d = (stats['post_sum'] + alphas_minus_one.sum(axis=1))[:, None] self.weights_ = w_n / w_d # Maximizing means if 'm' in self.params: m_n = stats['m_n'] m_d = stats['post_mix_sum'] + self.means_weight # If a componenent has zero weight, then replace nan (0/0?) means # by 0 (0/1). The actual value is irrelevant as the component will # be unused. This needs to be done before maximizing covariances # as nans would otherwise propagate to other components if # covariances are tied. m_d[(self.weights_ == 0) & (m_n == 0).all(axis=-1)] = 1 self.means_ = m_n / m_d[:, :, None] # Maximizing covariances if 'c' in self.params: lambdas, mus = self.means_weight, self.means_prior centered_means = self.means_ - mus def outer_f(x): # Outer product over features. return x[..., :, None] * x[..., None, :] if self.covariance_type == 'full': centered_means_dots = outer_f(centered_means) psis_t = np.transpose(self.covars_prior, axes=(0, 1, 3, 2)) nus = self.covars_weight c_n = psis_t + lambdas[:, :, None, None] * centered_means_dots c_n += stats['c_n'] c_d = ( stats['post_mix_sum'] + 1 + nus + nf + 1 )[:, :, None, None] elif self.covariance_type == 'diag': alphas = self.covars_prior betas = self.covars_weight centered_means2 = centered_means ** 2 c_n = lambdas[:, :, None] * centered_means2 + 2 * betas c_n += stats['c_n'] c_d = stats['post_mix_sum'][:, :, None] + 1 + 2 * (alphas + 1) elif self.covariance_type == 'spherical': centered_means_norm2 = np.einsum( # Faster than (x**2).sum(-1) '...i,...i', centered_means, centered_means) alphas = self.covars_prior betas = self.covars_weight c_n = lambdas * centered_means_norm2 + 2 * betas c_n += stats['c_n'] c_d = nf * (stats['post_mix_sum'] + 1) + 2 * (alphas + 1) elif self.covariance_type == 'tied': centered_means_dots = outer_f(centered_means) psis_t = np.transpose(self.covars_prior, axes=(0, 2, 1)) nus = self.covars_weight c_n = np.einsum('ij,ijkl->ikl', lambdas, centered_means_dots) + psis_t c_n += stats['c_n'] c_d = (stats['post_sum'] + nm + nus + nf + 1)[:, None, None] self.covars_ = c_n / c_d class MultinomialHMM(_emissions.BaseMultinomialHMM): """ Hidden Markov Model with multinomial emissions. Attributes ---------- n_features : int Number of possible symbols emitted by the model (in the samples). monitor_ : ConvergenceMonitor Monitor object used to check the convergence of EM. startprob_ : array, shape (n_components, ) Initial state occupation distribution. transmat_ : array, shape (n_components, n_components) Matrix of transition probabilities between states. emissionprob_ : array, shape (n_components, n_features) Probability of emitting a given symbol when in each state. Examples -------- >>> from hmmlearn.hmm import MultinomialHMM """ def __init__(self, n_components=1, n_trials=None, startprob_prior=1.0, transmat_prior=1.0, algorithm="viterbi", random_state=None, n_iter=10, tol=1e-2, verbose=False, params="ste", init_params="ste", implementation="log"): """ Parameters ---------- n_components : int Number of states. n_trials : int or array of int Number of trials (when sampling, all samples must have the same :attr:`n_trials`). startprob_prior : array, shape (n_components, ), optional Parameters of the Dirichlet prior distribution for :attr:`startprob_`. transmat_prior : array, shape (n_components, n_components), optional Parameters of the Dirichlet prior distribution for each row of the transition probabilities :attr:`transmat_`. algorithm : {"viterbi", "map"}, optional Decoder algorithm. random_state: RandomState or an int seed, optional A random number generator instance. n_iter : int, optional Maximum number of iterations to perform. tol : float, optional Convergence threshold. EM will stop if the gain in log-likelihood is below this value. verbose : bool, optional Whether per-iteration convergence reports are printed to :data:`sys.stderr`. Convergence can also be diagnosed using the :attr:`monitor_` attribute. params, init_params : string, optional The parameters that get updated during (``params``) or initialized before (``init_params``) the training. Can contain any combination of 's' for startprob, 't' for transmat, and 'e' for emissionprob. Defaults to all parameters. implementation : string, optional Determines if the forward-backward algorithm is implemented with logarithms ("log"), or using scaling ("scaling"). The default is to use logarithms for backwards compatability. """ BaseHMM.__init__(self, n_components, startprob_prior=startprob_prior, transmat_prior=transmat_prior, algorithm=algorithm, random_state=random_state, n_iter=n_iter, tol=tol, verbose=verbose, params=params, init_params=init_params, implementation=implementation) self.n_trials = n_trials _log.warning( "MultinomialHMM has undergone major changes. " "The previous version was implementing a CategoricalHMM " "(a special case of MultinomialHMM). " "This new implementation follows the standard definition for " "a Multinomial distribution (e.g. as in " "https://en.wikipedia.org/wiki/Multinomial_distribution). " "See these issues for details:\n" "https://github.com/hmmlearn/hmmlearn/issues/335\n" "https://github.com/hmmlearn/hmmlearn/issues/340") def _init(self, X, lengths=None): super()._init(X, lengths=None) self.random_state = check_random_state(self.random_state) if 'e' in self.init_params: self.emissionprob_ = self.random_state \ .rand(self.n_components, self.n_features) normalize(self.emissionprob_, axis=1) def _check(self): super()._check() self.emissionprob_ = np.atleast_2d(self.emissionprob_) n_features = getattr(self, "n_features", self.emissionprob_.shape[1]) if self.emissionprob_.shape != (self.n_components, n_features): raise ValueError( "emissionprob_ must have shape (n_components, n_features)") else: self.n_features = n_features if self.n_trials is None: raise ValueError("n_trials must be set") def _do_mstep(self, stats): super()._do_mstep(stats) if 'e' in self.params: self.emissionprob_ = ( stats['obs'] / stats['obs'].sum(axis=1, keepdims=True)) class PoissonHMM(_emissions.BasePoissonHMM): """ Hidden Markov Model with Poisson emissions. Attributes ---------- monitor_ : ConvergenceMonitor Monitor object used to check the convergence of EM. startprob_ : array, shape (n_components, ) Initial state occupation distribution. transmat_ : array, shape (n_components, n_components) Matrix of transition probabilities between states. lambdas_ : array, shape (n_components, n_features) The expectation value of the waiting time parameters for each feature in a given state. """ def __init__(self, n_components=1, startprob_prior=1.0, transmat_prior=1.0, lambdas_prior=0.0, lambdas_weight=0.0, algorithm="viterbi", random_state=None, n_iter=10, tol=1e-2, verbose=False, params="stl", init_params="stl", implementation="log"): """ Parameters ---------- n_components : int Number of states. startprob_prior : array, shape (n_components, ), optional Parameters of the Dirichlet prior distribution for :attr:`startprob_`. transmat_prior : array, shape (n_components, n_components), optional Parameters of the Dirichlet prior distribution for each row of the transition probabilities :attr:`transmat_`. lambdas_prior, lambdas_weight : array, shape (n_components,), optional The gamma prior on the lambda values using alpha-beta notation, respectivley. If None, will be set based on the method of moments. algorithm : {"viterbi", "map"}, optional Decoder algorithm. random_state: RandomState or an int seed, optional A random number generator instance. n_iter : int, optional Maximum number of iterations to perform. tol : float, optional Convergence threshold. EM will stop if the gain in log-likelihood is below this value. verbose : bool, optional Whether per-iteration convergence reports are printed to :data:`sys.stderr`. Convergence can also be diagnosed using the :attr:`monitor_` attribute. params, init_params : string, optional The parameters that get updated during (``params``) or initialized before (``init_params``) the training. Can contain any combination of 's' for startprob, 't' for transmat, and 'l' for lambdas. Defaults to all parameters. implementation : string, optional Determines if the forward-backward algorithm is implemented with logarithms ("log"), or using scaling ("scaling"). The default is to use logarithms for backwards compatability. """ BaseHMM.__init__(self, n_components, startprob_prior=startprob_prior, transmat_prior=transmat_prior, algorithm=algorithm, random_state=random_state, n_iter=n_iter, tol=tol, verbose=verbose, params=params, init_params=init_params, implementation=implementation) self.lambdas_prior = lambdas_prior self.lambdas_weight = lambdas_weight def _init(self, X, lengths=None): super()._init(X, lengths) self.random_state = check_random_state(self.random_state) mean_X = X.mean() var_X = X.var() if self._needs_init("l", "lambdas_"): # initialize with method of moments based on X self.lambdas_ = self.random_state.gamma( shape=mean_X**2 / var_X, scale=var_X / mean_X, # numpy uses theta = 1 / beta size=(self.n_components, self.n_features)) def _check(self): super()._check() self.lambdas_ = np.atleast_2d(self.lambdas_) n_features = getattr(self, "n_features", self.lambdas_.shape[1]) if self.lambdas_.shape != (self.n_components, n_features): raise ValueError( "lambdas_ must have shape (n_components, n_features)") self.n_features = n_features def _do_mstep(self, stats): super()._do_mstep(stats) if 'l' in self.params: # Based on: Hyvönen & Tolonen, "Bayesian Inference 2019" # section 3.2 # https://vioshyvo.github.io/Bayesian_inference alphas, betas = self.lambdas_prior, self.lambdas_weight n = stats['post'].sum() y_bar = stats['obs'] / stats['post'][:, None] # the same as kappa notation (more intuitive) but avoids divide by # 0, where: # kappas = betas / (betas + n) # self.lambdas_ = kappas * (alphas / betas) + (1 - kappas) * y_bar self.lambdas_ = (alphas + n * y_bar) / (betas + n) hmmlearn-0.3.0/lib/hmmlearn/stats.py000066400000000000000000000076121441755530000174100ustar00rootroot00000000000000import numpy as np from scipy import linalg def log_multivariate_normal_density(X, means, covars, covariance_type='diag'): """ Compute the log probability under a multivariate Gaussian distribution. Parameters ---------- X : array_like, shape (n_samples, n_features) List of n_features-dimensional data points. Each row corresponds to a single data point. means : array_like, shape (n_components, n_features) List of n_features-dimensional mean vectors for n_components Gaussians. Each row corresponds to a single mean vector. covars : array_like List of n_components covariance parameters for each Gaussian. The shape depends on `covariance_type`: * (n_components, ) if "spherical", * (n_components, n_features) if "diag", * (n_components, n_features, n_features) if "full", * (n_features, n_features) if "tied". covariance_type : {"spherical", "diag", "full", "tied"}, optional The type of the covariance parameters. Defaults to 'diag'. Returns ------- lpr : array_like, shape (n_samples, n_components) Array containing the log probabilities of each data point in X under each of the n_components multivariate Gaussian distributions. """ log_multivariate_normal_density_dict = { 'spherical': _log_multivariate_normal_density_spherical, 'tied': _log_multivariate_normal_density_tied, 'diag': _log_multivariate_normal_density_diag, 'full': _log_multivariate_normal_density_full} return log_multivariate_normal_density_dict[covariance_type]( X, means, covars ) def _log_multivariate_normal_density_diag(X, means, covars): """Compute Gaussian log-density at X for a diagonal model.""" # X: (ns, nf); means: (nc, nf); covars: (nc, nf) -> (ns, nc) nc, nf = means.shape # Avoid 0 log 0 = nan in degenerate covariance case. covars = np.maximum(covars, np.finfo(float).tiny) with np.errstate(over="ignore"): return -0.5 * (nf * np.log(2 * np.pi) + np.log(covars).sum(axis=-1) + ((X[:, None, :] - means) ** 2 / covars).sum(axis=-1)) def _log_multivariate_normal_density_spherical(X, means, covars): """Compute Gaussian log-density at X for a spherical model.""" nc, nf = means.shape if covars.ndim == 1: covars = covars[:, np.newaxis] covars = np.broadcast_to(covars, (nc, nf)) return _log_multivariate_normal_density_diag(X, means, covars) def _log_multivariate_normal_density_tied(X, means, covars): """Compute Gaussian log-density at X for a tied model.""" nc, nf = means.shape cv = np.broadcast_to(covars, (nc, nf, nf)) return _log_multivariate_normal_density_full(X, means, cv) def _log_multivariate_normal_density_full(X, means, covars, min_covar=1.e-7): """Log probability for full covariance matrices.""" nc, nf = means.shape log_prob = [] for c, (mu, cv) in enumerate(zip(means, covars)): try: cv_chol = linalg.cholesky(cv, lower=True) except linalg.LinAlgError: # The model is most probably stuck in a component with too # few observations, we need to reinitialize this components try: cv_chol = linalg.cholesky(cv + min_covar * np.eye(nf), lower=True) except linalg.LinAlgError: raise ValueError("'covars' must be symmetric, " "positive-definite") cv_log_det = 2 * np.sum(np.log(np.diagonal(cv_chol))) cv_sol = linalg.solve_triangular(cv_chol, (X - mu).T, lower=True).T log_prob.append(-.5 * (nf * np.log(2 * np.pi) + (cv_sol ** 2).sum(axis=1) + cv_log_det)) return np.transpose(log_prob) hmmlearn-0.3.0/lib/hmmlearn/tests/000077500000000000000000000000001441755530000170345ustar00rootroot00000000000000hmmlearn-0.3.0/lib/hmmlearn/tests/__init__.py000066400000000000000000000073171441755530000211550ustar00rootroot00000000000000import numpy as np import pytest from sklearn.datasets import make_spd_matrix from sklearn.utils import check_random_state from hmmlearn.utils import normalize from hmmlearn.base import DECODER_ALGORITHMS # Make NumPy complain about underflows/overflows etc. np.seterr(all="warn") def make_covar_matrix(covariance_type, n_components, n_features, random_state=None): mincv = 0.1 prng = check_random_state(random_state) if covariance_type == 'spherical': return (mincv + mincv * prng.random_sample((n_components,))) ** 2 elif covariance_type == 'tied': return (make_spd_matrix(n_features) + mincv * np.eye(n_features)) elif covariance_type == 'diag': return (mincv + mincv * prng.random_sample((n_components, n_features))) ** 2 elif covariance_type == 'full': return np.array([ (make_spd_matrix(n_features, random_state=prng) + mincv * np.eye(n_features)) for x in range(n_components) ]) def normalized(X, axis=None): X_copy = X.copy() normalize(X_copy, axis=axis) return X_copy def assert_log_likelihood_increasing(h, X, lengths, n_iter): h.n_iter = 1 # make sure we do a single iteration at a time h.init_params = '' # and don't re-init params log_likelihoods = np.empty(n_iter, dtype=float) for i in range(n_iter): h.fit(X, lengths=lengths) log_likelihoods[i] = h.score(X, lengths=lengths) # XXX the rounding is necessary because LL can oscillate in the # fractional part, failing the tests. diff = np.diff(log_likelihoods) value = np.finfo(float).eps ** (1/2) assert diff.max() > value, f"Non-increasing log-likelihoods:\n" \ f"lls={log_likelihoods}\n" \ f"diff={diff}\n" \ f"diff.max() < value={diff.min() < value}\n" \ f"np.finfo(float).eps={value}\n" def compare_variational_and_em_models(variational, em, sequences, lengths): em_score = em.score(sequences, lengths) vi_score = variational.score(sequences, lengths) em_scores = em.predict(sequences, lengths) vi_scores = variational.predict(sequences, lengths) assert em_score == pytest.approx(vi_score), (em_score, vi_score) assert np.all(em_scores == vi_scores) for decode_algo in DECODER_ALGORITHMS: em_logprob, em_path = em.decode(sequences, lengths, algorithm=decode_algo) vi_logprob, vi_path = variational.decode(sequences, lengths, algorithm=decode_algo) assert em_logprob == pytest.approx(vi_logprob), decode_algo assert np.all(em_path == vi_path), decode_algo em_predict = em.predict(sequences, lengths) vi_predict = variational.predict(sequences, lengths) assert np.all(em_predict == vi_predict) em_logprob, em_posteriors = em.score_samples(sequences, lengths) vi_logprob, vi_posteriors = variational.score_samples(sequences, lengths) assert em_logprob == pytest.approx(vi_logprob) assert np.all(em_posteriors == pytest.approx(vi_posteriors)) em_obs, em_states = em.sample(100, random_state=42) vi_obs, vi_states = variational.sample(100, random_state=42) assert np.all(em_obs == vi_obs) assert np.all(em_states == vi_states) def vi_uniform_startprob_and_transmat(model, lengths): nc = model.n_components model.startprob_prior_ = np.full(nc, 1/nc) model.startprob_posterior_ = np.full(nc, 1/nc) * len(lengths) model.transmat_prior_ = np.full((nc, nc), 1/nc) model.transmat_posterior_ = np.full((nc, nc), 1/nc)*sum(lengths) return model hmmlearn-0.3.0/lib/hmmlearn/tests/conftest.py000066400000000000000000000004711441755530000212350ustar00rootroot00000000000000import os import random import numpy as np def pytest_configure(config): _random_seed = int(os.environ.get("HMMLEARN_SEED", np.random.uniform() * (2**31 - 1))) print(f"set RNG seed to {_random_seed}") np.random.seed(_random_seed) random.seed(_random_seed) hmmlearn-0.3.0/lib/hmmlearn/tests/test_base.py000066400000000000000000000222251441755530000213620ustar00rootroot00000000000000import numpy as np from numpy.testing import assert_allclose import pytest from scipy import special from hmmlearn.base import BaseHMM, ConvergenceMonitor from hmmlearn import _hmmc class TestMonitor: def test_converged_by_iterations(self): m = ConvergenceMonitor(tol=1e-3, n_iter=2, verbose=False) assert not m.converged m.report(-0.01) assert not m.converged m.report(-0.1) assert m.converged def test_converged_by_log_prob(self): m = ConvergenceMonitor(tol=1e-3, n_iter=10, verbose=False) for log_prob in [-0.03, -0.02, -0.01]: m.report(log_prob) assert not m.converged m.report(-0.0101) assert m.converged def test_reset(self): m = ConvergenceMonitor(tol=1e-3, n_iter=10, verbose=False) m.iter = 1 m.history.append(-0.01) m._reset() assert m.iter == 0 assert not m.history def test_report_first_iteration(self, capsys): m = ConvergenceMonitor(tol=1e-3, n_iter=10, verbose=True) m.report(-0.01) out, err = capsys.readouterr() assert not out expected = m._template.format(iter=1, log_prob=-0.01, delta=np.nan) assert err.splitlines() == [expected] def test_report(self, capsys): n_iter = 10 m = ConvergenceMonitor(tol=1e-3, n_iter=n_iter, verbose=True) for i in reversed(range(n_iter)): m.report(-0.01 * i) out, err = capsys.readouterr() assert not out assert len(err.splitlines()) == n_iter assert len(m.history) == n_iter class StubHMM(BaseHMM): """An HMM with hardcoded observation probabilities.""" def _compute_log_likelihood(self, X): return self.log_frameprob class TestBaseAgainstWikipedia: def setup_method(self, method): # Example from http://en.wikipedia.org/wiki/Forward-backward_algorithm self.frameprob = np.asarray([[0.9, 0.2], [0.9, 0.2], [0.1, 0.8], [0.9, 0.2], [0.9, 0.2]]) self.log_frameprob = np.log(self.frameprob) h = StubHMM(2) h.transmat_ = [[0.7, 0.3], [0.3, 0.7]] h.startprob_ = [0.5, 0.5] h.log_frameprob = self.log_frameprob h.frameprob = self.frameprob self.hmm = h def test_do_forward_scaling_pass(self): log_prob, fwdlattice, scaling_factors = _hmmc.forward_scaling( self.hmm.startprob_, self.hmm.transmat_, self.frameprob) ref_log_prob = -3.3725 assert round(log_prob, 4) == ref_log_prob reffwdlattice = np.exp([[0.4500, 0.1000], [0.3105, 0.0410], [0.0230, 0.0975], [0.0408, 0.0150], [0.0298, 0.0046]]) assert_allclose(np.exp(fwdlattice), reffwdlattice, 4) def test_do_forward_pass(self): log_prob, fwdlattice = _hmmc.forward_log( self.hmm.startprob_, self.hmm.transmat_, self.log_frameprob) ref_log_prob = -3.3725 assert round(log_prob, 4) == ref_log_prob reffwdlattice = np.array([[0.4500, 0.1000], [0.3105, 0.0410], [0.0230, 0.0975], [0.0408, 0.0150], [0.0298, 0.0046]]) assert_allclose(np.exp(fwdlattice), reffwdlattice, 4) def test_do_backward_scaling_pass(self): log_prob, fwdlattice, scaling_factors = _hmmc.forward_scaling( self.hmm.startprob_, self.hmm.transmat_, self.frameprob) bwdlattice = _hmmc.backward_scaling(self.hmm.startprob_, self.hmm.transmat_, self.frameprob, scaling_factors) refbwdlattice = np.array([[0.0661, 0.0455], [0.0906, 0.1503], [0.4593, 0.2437], [0.6900, 0.4100], [1.0000, 1.0000]]) scaling_factors = np.cumprod(scaling_factors[::-1])[::-1] bwdlattice_scaled = bwdlattice / scaling_factors[:, None] # Answer will be equivalent when the scaling factor is accounted for assert_allclose(bwdlattice_scaled, refbwdlattice, 4) def test_do_backward_log_pass(self): bwdlattice = _hmmc.backward_log( self.hmm.startprob_, self.hmm.transmat_, self.log_frameprob) refbwdlattice = np.array([[0.0661, 0.0455], [0.0906, 0.1503], [0.4593, 0.2437], [0.6900, 0.4100], [1.0000, 1.0000]]) assert_allclose(np.exp(bwdlattice), refbwdlattice, 4) def test_do_viterbi_pass(self): log_prob, state_sequence = _hmmc.viterbi( self.hmm.startprob_, self.hmm.transmat_, self.log_frameprob) refstate_sequence = [0, 0, 1, 0, 0] assert_allclose(state_sequence, refstate_sequence) ref_log_prob = -4.4590 assert round(log_prob, 4) == ref_log_prob def test_score_samples(self): # ``StubHMM` ignores the values in ```X``, so we just pass in an # array of the appropriate shape. log_prob, posteriors = self.hmm.score_samples(self.log_frameprob) assert_allclose(posteriors.sum(axis=1), np.ones(len(posteriors))) ref_log_prob = -3.3725 assert round(log_prob, 4) == ref_log_prob refposteriors = np.array([[0.8673, 0.1327], [0.8204, 0.1796], [0.3075, 0.6925], [0.8204, 0.1796], [0.8673, 0.1327]]) assert_allclose(posteriors, refposteriors, atol=1e-4) def test_generate_samples(self): X0, Z0 = self.hmm.sample(n_samples=10) X, Z = self.hmm.sample(n_samples=10, currstate=Z0[-1]) assert len(Z0) == len(Z) == 10 and Z[0] == Z0[-1] class TestBaseConsistentWithGMM: def setup_method(self, method): n_components = 8 n_samples = 10 self.log_frameprob = np.log( np.random.random((n_samples, n_components))) h = StubHMM(n_components) h.log_frameprob = self.log_frameprob # If startprob and transmat are uniform across all states (the # default), the transitions are uninformative - the model # reduces to a GMM with uniform mixing weights (in terms of # posteriors, not likelihoods). h.startprob_ = np.ones(n_components) / n_components h.transmat_ = np.ones((n_components, n_components)) / n_components self.hmm = h def test_score_samples(self): log_prob, hmmposteriors = self.hmm.score_samples(self.log_frameprob) n_samples, n_components = self.log_frameprob.shape assert_allclose(hmmposteriors.sum(axis=1), np.ones(n_samples)) norm = special.logsumexp(self.log_frameprob, axis=1)[:, np.newaxis] gmmposteriors = np.exp(self.log_frameprob - np.tile(norm, (1, n_components))) assert_allclose(hmmposteriors, gmmposteriors) def test_decode(self): _log_prob, state_sequence = self.hmm.decode(self.log_frameprob) n_samples, n_components = self.log_frameprob.shape norm = special.logsumexp(self.log_frameprob, axis=1)[:, np.newaxis] gmmposteriors = np.exp(self.log_frameprob - np.tile(norm, (1, n_components))) gmmstate_sequence = gmmposteriors.argmax(axis=1) assert_allclose(state_sequence, gmmstate_sequence) def test_base_hmm_attributes(): n_components = 20 startprob = np.random.random(n_components) startprob /= startprob.sum() transmat = np.random.random((n_components, n_components)) transmat /= np.tile(transmat.sum(axis=1)[:, np.newaxis], (1, n_components)) h = StubHMM(n_components) assert h.n_components == n_components h.startprob_ = startprob assert_allclose(h.startprob_, startprob) with pytest.raises(ValueError): h.startprob_ = 2 * startprob h._check() with pytest.raises(ValueError): h.startprob_ = [] h._check() with pytest.raises(ValueError): h.startprob_ = np.zeros((n_components - 2, 2)) h._check() h.startprob_ = startprob h.transmat_ = transmat assert_allclose(h.transmat_, transmat) with pytest.raises(ValueError): h.transmat_ = 2 * transmat h._check() with pytest.raises(ValueError): h.transmat_ = [] h._check() with pytest.raises(ValueError): h.transmat_ = np.zeros((n_components - 2, n_components)) h._check() def test_stationary_distribution(): n_components = 10 h = StubHMM(n_components) transmat = np.random.random((n_components, n_components)) transmat /= np.tile(transmat.sum(axis=1)[:, np.newaxis], (1, n_components)) h.transmat_ = transmat stationary = h.get_stationary_distribution() assert stationary.dtype == float assert (h.get_stationary_distribution().T @ h.transmat_ == pytest.approx(stationary)) hmmlearn-0.3.0/lib/hmmlearn/tests/test_categorical_hmm.py000066400000000000000000000146451441755530000235750ustar00rootroot00000000000000import numpy as np from numpy.testing import assert_allclose import pytest from hmmlearn import hmm from . import assert_log_likelihood_increasing, normalized class TestCategoricalAgainstWikipedia: """ Examples from Wikipedia: - http://en.wikipedia.org/wiki/Hidden_Markov_model - http://en.wikipedia.org/wiki/Viterbi_algorithm """ def new_hmm(self, impl): n_components = 2 # ['Rainy', 'Sunny'] n_features = 3 # ['walk', 'shop', 'clean'] h = hmm.CategoricalHMM(n_components, implementation=impl) h.n_features = n_features h.startprob_ = np.array([0.6, 0.4]) h.transmat_ = np.array([[0.7, 0.3], [0.4, 0.6]]) h.emissionprob_ = np.array([[0.1, 0.4, 0.5], [0.6, 0.3, 0.1]]) return h @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_decode_viterbi(self, implementation): # From http://en.wikipedia.org/wiki/Viterbi_algorithm: # "This reveals that the observations ['walk', 'shop', 'clean'] # were most likely generated by states ['Sunny', 'Rainy', 'Rainy'], # with probability 0.01344." h = self.new_hmm(implementation) X = [[0], [1], [2]] log_prob, state_sequence = h.decode(X, algorithm="viterbi") assert round(np.exp(log_prob), 5) == 0.01344 assert_allclose(state_sequence, [1, 0, 0]) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_decode_map(self, implementation): X = [[0], [1], [2]] h = self.new_hmm(implementation) _log_prob, state_sequence = h.decode(X, algorithm="map") assert_allclose(state_sequence, [1, 0, 0]) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_predict(self, implementation): X = [[0], [1], [2]] h = self.new_hmm(implementation) state_sequence = h.predict(X) posteriors = h.predict_proba(X) assert_allclose(state_sequence, [1, 0, 0]) assert_allclose(posteriors, [ [0.23170303, 0.76829697], [0.62406281, 0.37593719], [0.86397706, 0.13602294], ], rtol=0, atol=1e-6) class TestCategoricalHMM: n_components = 2 n_features = 3 def new_hmm(self, impl): h = hmm.CategoricalHMM(self.n_components, implementation=impl) h.startprob_ = np.array([0.6, 0.4]) h.transmat_ = np.array([[0.7, 0.3], [0.4, 0.6]]) h.emissionprob_ = np.array([[0.1, 0.4, 0.5], [0.6, 0.3, 0.1]]) return h @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_n_features(self, implementation): sequences, _ = self.new_hmm(implementation).sample(500) # set n_features model = hmm.CategoricalHMM( n_components=2, implementation=implementation) assert_log_likelihood_increasing(model, sequences, [500], 10) assert model.n_features == 3 # Respect n_features model = hmm.CategoricalHMM( n_components=2, implementation=implementation, n_features=5) assert_log_likelihood_increasing(model, sequences, [500], 10) assert model.n_features == 5 @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_attributes(self, implementation): with pytest.raises(ValueError): h = self.new_hmm(implementation) h.emissionprob_ = [] h._check() with pytest.raises(ValueError): h.emissionprob_ = np.zeros((self.n_components - 2, self.n_features)) h._check() @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_score_samples(self, implementation): idx = np.repeat(np.arange(self.n_components), 10) n_samples = len(idx) X = np.random.randint(self.n_features, size=(n_samples, 1)) h = self.new_hmm(implementation) ll, posteriors = h.score_samples(X) assert posteriors.shape == (n_samples, self.n_components) assert_allclose(posteriors.sum(axis=1), np.ones(n_samples)) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_sample(self, implementation, n_samples=1000): h = self.new_hmm(implementation) X, state_sequence = h.sample(n_samples) assert X.ndim == 2 assert len(X) == len(state_sequence) == n_samples assert len(np.unique(X)) == self.n_features @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_fit(self, implementation, params='ste', n_iter=5): h = self.new_hmm(implementation) h.params = params lengths = np.array([10] * 10) X, _state_sequence = h.sample(lengths.sum()) # Mess up the parameters and see if we can re-learn them. h.startprob_ = normalized(np.random.random(self.n_components)) h.transmat_ = normalized( np.random.random((self.n_components, self.n_components)), axis=1) h.emissionprob_ = normalized( np.random.random((self.n_components, self.n_features)), axis=1) assert_log_likelihood_increasing(h, X, lengths, n_iter) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_fit_emissionprob(self, implementation): self.test_fit(implementation, 'e') @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_fit_with_init(self, implementation, params='ste', n_iter=5): lengths = [10] * 10 h = self.new_hmm(implementation) X, _state_sequence = h.sample(sum(lengths)) # use init_function to initialize paramerters h = hmm.CategoricalHMM(self.n_components, params=params, init_params=params) h._init(X, lengths) assert_log_likelihood_increasing(h, X, lengths, n_iter) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test__check_and_set_categorical_n_features(self, implementation): h = self.new_hmm(implementation) h._check_and_set_n_features(np.array([[0, 0, 2, 1, 3, 1, 1]]).T) h._check_and_set_n_features(np.array([[0, 0, 1, 3, 1]], np.uint8)) with pytest.raises(ValueError): # non-integral h._check_and_set_n_features(np.array([[0., 2., 1., 3.]])) with pytest.raises(ValueError): # negative integers h._check_and_set_n_features(np.array([[0, -2, 1, 3, 1, 1]])) hmmlearn-0.3.0/lib/hmmlearn/tests/test_gaussian_hmm.py000066400000000000000000000346561441755530000231360ustar00rootroot00000000000000import numpy as np from numpy.testing import assert_allclose import pytest from sklearn.utils import check_random_state from .. import hmm from . import assert_log_likelihood_increasing, make_covar_matrix, normalized class GaussianHMMTestMixin: covariance_type = None # set by subclasses @pytest.fixture(autouse=True) def setup(self): self.prng = prng = np.random.RandomState(10) self.n_components = n_components = 3 self.n_features = n_features = 3 self.startprob = normalized(prng.rand(n_components)) self.transmat = normalized( prng.rand(n_components, n_components), axis=1) self.means = prng.randint(-20, 20, (n_components, n_features)) self.covars = make_covar_matrix( self.covariance_type, n_components, n_features, random_state=prng) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_bad_covariance_type(self, implementation): with pytest.raises(ValueError): h = hmm.GaussianHMM(20, implementation=implementation, covariance_type='badcovariance_type') h.means_ = self.means h.covars_ = [] h.startprob_ = self.startprob h.transmat_ = self.transmat h._check() @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_score_samples_and_decode(self, implementation): h = hmm.GaussianHMM(self.n_components, self.covariance_type, init_params="st", implementation=implementation) h.means_ = self.means h.covars_ = self.covars # Make sure the means are far apart so posteriors.argmax() # picks the actual component used to generate the observations. h.means_ = 20 * h.means_ gaussidx = np.repeat(np.arange(self.n_components), 5) n_samples = len(gaussidx) X = (self.prng.randn(n_samples, self.n_features) + h.means_[gaussidx]) h._init(X, [n_samples]) ll, posteriors = h.score_samples(X) assert posteriors.shape == (n_samples, self.n_components) assert_allclose(posteriors.sum(axis=1), np.ones(n_samples)) viterbi_ll, stateseq = h.decode(X) assert_allclose(stateseq, gaussidx) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_sample(self, implementation, n=1000): h = hmm.GaussianHMM(self.n_components, self.covariance_type, implementation=implementation) h.startprob_ = self.startprob h.transmat_ = self.transmat # Make sure the means are far apart so posteriors.argmax() # picks the actual component used to generate the observations. h.means_ = 20 * self.means h.covars_ = np.maximum(self.covars, 0.1) X, state_sequence = h.sample(n, random_state=self.prng) assert X.shape == (n, self.n_features) assert len(state_sequence) == n @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_fit(self, implementation, params='stmc', n_iter=5, **kwargs): h = hmm.GaussianHMM(self.n_components, self.covariance_type, implementation=implementation) h.startprob_ = self.startprob h.transmat_ = normalized( self.transmat + np.diag(self.prng.rand(self.n_components)), 1) h.means_ = 20 * self.means h.covars_ = self.covars lengths = [10] * 10 X, _state_sequence = h.sample(sum(lengths), random_state=self.prng) # Mess up the parameters and see if we can re-learn them. # TODO: change the params and uncomment the check h.fit(X, lengths=lengths) # assert_log_likelihood_increasing(h, X, lengths, n_iter) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_criterion(self, implementation): random_state = check_random_state(42) m1 = hmm.GaussianHMM(self.n_components, init_params="", covariance_type=self.covariance_type) m1.startprob_ = self.startprob m1.transmat_ = self.transmat m1.means_ = self.means * 10 m1.covars_ = self.covars X, _ = m1.sample(2000, random_state=random_state) aic = [] bic = [] ns = [2, 3, 4] for n in ns: h = hmm.GaussianHMM(n, self.covariance_type, n_iter=500, random_state=random_state, implementation=implementation) h.fit(X) aic.append(h.aic(X)) bic.append(h.bic(X)) assert np.all(aic) > 0 assert np.all(bic) > 0 # AIC / BIC pick the right model occasionally # assert ns[np.argmin(aic)] == self.n_components # assert ns[np.argmin(bic)] == self.n_components @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_fit_ignored_init_warns(self, implementation, caplog): h = hmm.GaussianHMM(self.n_components, self.covariance_type, implementation=implementation) h.startprob_ = self.startprob h.fit(np.random.randn(100, self.n_components)) assert len(caplog.records) == 1, caplog assert "will be overwritten" in caplog.records[0].getMessage() @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_fit_too_little_data(self, implementation, caplog): h = hmm.GaussianHMM( self.n_components, self.covariance_type, init_params="", implementation=implementation) h.startprob_ = self.startprob h.transmat_ = self.transmat h.means_ = 20 * self.means h.covars_ = np.maximum(self.covars, 0.1) h._init(np.random.randn(5, self.n_components), 5) assert len(caplog.records) == 1 assert "degenerate solution" in caplog.records[0].getMessage() @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_fit_sequences_of_different_length(self, implementation): lengths = [3, 4, 5] X = self.prng.rand(sum(lengths), self.n_features) h = hmm.GaussianHMM(self.n_components, self.covariance_type, implementation=implementation) # This shouldn't raise # ValueError: setting an array element with a sequence. h.fit(X, lengths=lengths) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_fit_with_length_one_signal(self, implementation): lengths = [10, 8, 1] X = self.prng.rand(sum(lengths), self.n_features) h = hmm.GaussianHMM(self.n_components, self.covariance_type, implementation=implementation) # This shouldn't raise # ValueError: zero-size array to reduction operation maximum which # has no identity h.fit(X, lengths=lengths) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_fit_zero_variance(self, implementation): # Example from issue #2 on GitHub. X = np.asarray([ [7.15000000e+02, 5.85000000e+02, 0.00000000e+00, 0.00000000e+00], [7.15000000e+02, 5.20000000e+02, 1.04705811e+00, -6.03696289e+01], [7.15000000e+02, 4.55000000e+02, 7.20886230e-01, -5.27055664e+01], [7.15000000e+02, 3.90000000e+02, -4.57946777e-01, -7.80605469e+01], [7.15000000e+02, 3.25000000e+02, -6.43127441e+00, -5.59954834e+01], [7.15000000e+02, 2.60000000e+02, -2.90063477e+00, -7.80220947e+01], [7.15000000e+02, 1.95000000e+02, 8.45532227e+00, -7.03294373e+01], [7.15000000e+02, 1.30000000e+02, 4.09387207e+00, -5.83621216e+01], [7.15000000e+02, 6.50000000e+01, -1.21667480e+00, -4.48131409e+01] ]) h = hmm.GaussianHMM(3, self.covariance_type, implementation=implementation) h.fit(X) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_fit_with_priors(self, implementation, init_params='mc', params='stmc', n_iter=5): startprob_prior = 10 * self.startprob + 2.0 transmat_prior = 10 * self.transmat + 2.0 means_prior = self.means means_weight = 2.0 covars_weight = 2.0 if self.covariance_type in ('full', 'tied'): covars_weight += self.n_features covars_prior = self.covars h = hmm.GaussianHMM(self.n_components, self.covariance_type, implementation=implementation) h.startprob_ = self.startprob h.startprob_prior = startprob_prior h.transmat_ = normalized( self.transmat + np.diag(self.prng.rand(self.n_components)), 1) h.transmat_prior = transmat_prior h.means_ = 20 * self.means h.means_prior = means_prior h.means_weight = means_weight h.covars_ = self.covars h.covars_prior = covars_prior h.covars_weight = covars_weight lengths = [200] * 10 X, _state_sequence = h.sample(sum(lengths), random_state=self.prng) # Re-initialize the parameters and check that we can converge to # the original parameter values. h_learn = hmm.GaussianHMM(self.n_components, self.covariance_type, init_params=init_params, params=params, implementation=implementation,) # don't use random parameters for testing init = 1. / h_learn.n_components h_learn.startprob_ = np.full(h_learn.n_components, init) h_learn.transmat_ = \ np.full((h_learn.n_components, h_learn.n_components), init) h_learn.n_iter = 0 h_learn.fit(X, lengths=lengths) assert_log_likelihood_increasing(h_learn, X, lengths, n_iter) # Make sure we've converged to the right parameters. # a) means assert_allclose(sorted(h.means_.tolist()), sorted(h_learn.means_.tolist()), 0.01) # b) covars are hard to estimate precisely from a relatively small # sample, thus the large threshold assert_allclose( *np.broadcast_arrays(sorted(h._covars_.tolist()), sorted(h_learn._covars_.tolist())), 10) class TestGaussianHMMWithSphericalCovars(GaussianHMMTestMixin): covariance_type = 'spherical' @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_issue_385(self, implementation): model = hmm.GaussianHMM(n_components=2, covariance_type="spherical") model.startprob_ = np.array([0.6, 0.4]) model.transmat_ = np.array([[0.4, 0.6], [0.9, 0.1]]) model.means_ = np.array([[3.0], [5.0]]) model.covars_ = np.array([[[[4.0]]], [[[3.0]]]]) # If setting up an HMM to immediately sample from, the easiest thing is # to just set n_features. We could infer it from self.means_ perhaps. model.n_features = 1 covars = model.covars_ # Make sure covariance is of correct format - the spherical case would # throw an exception here. model.sample(1000) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_fit_startprob_and_transmat(self, implementation): self.test_fit(implementation, 'st') @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_underflow_from_scaling(self, implementation): # Setup an ill-conditioned dataset data1 = self.prng.normal(0, 1, 100).tolist() data2 = self.prng.normal(5, 1, 100).tolist() data3 = self.prng.normal(0, 1, 100).tolist() data4 = self.prng.normal(5, 1, 100).tolist() data = np.concatenate([data1, data2, data3, data4]) # Insert an outlier data[40] = 10000 data2d = data[:, None] lengths = [len(data2d)] h = hmm.GaussianHMM(2, n_iter=100, verbose=True, covariance_type=self.covariance_type, implementation=implementation, init_params="") h.startprob_ = [0.0, 1] h.transmat_ = [[0.4, 0.6], [0.6, 0.4]] h.means_ = [[0], [5]] h.covars_ = [[1], [1]] if implementation == "scaling": with pytest.raises(ValueError): h.fit(data2d, lengths) else: h.fit(data2d, lengths) class TestGaussianHMMWithDiagonalCovars(GaussianHMMTestMixin): covariance_type = 'diag' @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_covar_is_writeable(self, implementation): h = hmm.GaussianHMM(n_components=1, covariance_type="diag", init_params="c", implementation=implementation) X = np.random.normal(size=(1000, 5)) h._init(X, 1000) # np.diag returns a read-only view of the array in NumPy 1.9.X. # Make sure this doesn't prevent us from fitting an HMM with # diagonal covariance matrix. See PR#44 on GitHub for details # and discussion. assert h._covars_.flags["WRITEABLE"] @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_fit_left_right(self, implementation): transmat = np.zeros((self.n_components, self.n_components)) # Left-to-right: each state is connected to itself and its # direct successor. for i in range(self.n_components): if i == self.n_components - 1: transmat[i, i] = 1.0 else: transmat[i, i] = transmat[i, i + 1] = 0.5 # Always start in first state startprob = np.zeros(self.n_components) startprob[0] = 1.0 lengths = [10, 8, 1] X = self.prng.rand(sum(lengths), self.n_features) h = hmm.GaussianHMM(self.n_components, covariance_type="diag", params="mct", init_params="cm", implementation=implementation) h.startprob_ = startprob.copy() h.transmat_ = transmat.copy() h.fit(X) assert (h.startprob_[startprob == 0.0] == 0.0).all() assert (h.transmat_[transmat == 0.0] == 0.0).all() posteriors = h.predict_proba(X) assert not np.isnan(posteriors).any() assert_allclose(posteriors.sum(axis=1), 1.) score, state_sequence = h.decode(X, algorithm="viterbi") assert np.isfinite(score) class TestGaussianHMMWithTiedCovars(GaussianHMMTestMixin): covariance_type = 'tied' class TestGaussianHMMWithFullCovars(GaussianHMMTestMixin): covariance_type = 'full' hmmlearn-0.3.0/lib/hmmlearn/tests/test_gmm_hmm.py000066400000000000000000000076701441755530000221000ustar00rootroot00000000000000import numpy as np from numpy.testing import assert_allclose import pytest from sklearn.mixture import GaussianMixture from sklearn.utils import check_random_state from hmmlearn import hmm from . import assert_log_likelihood_increasing, make_covar_matrix, normalized pytestmark = pytest.mark.xfail() def create_random_gmm(n_mix, n_features, covariance_type, prng=0): prng = check_random_state(prng) g = GaussianMixture(n_mix, covariance_type=covariance_type) g.means_ = prng.randint(-20, 20, (n_mix, n_features)) g.covars_ = make_covar_matrix(covariance_type, n_mix, n_features) g.weights_ = normalized(prng.rand(n_mix)) return g class GMMHMMTestMixin: def setup_method(self, method): self.prng = np.random.RandomState(9) self.n_components = 3 self.n_mix = 2 self.n_features = 2 self.startprob = normalized(self.prng.rand(self.n_components)) self.transmat = normalized( self.prng.rand(self.n_components, self.n_components), axis=1) self.gmms = [] for state in range(self.n_components): self.gmms.append(create_random_gmm( self.n_mix, self.n_features, self.covariance_type, prng=self.prng)) def test_score_samples_and_decode(self): h = hmm.GMMHMM(self.n_components, covariance_type=self.covariance_type) h.startprob_ = self.startprob h.transmat_ = self.transmat h.gmms_ = self.gmms # Make sure the means are far apart so posteriors.argmax() # picks the actual component used to generate the observations. for g in h.gmms_: g.means_ *= 20 refstateseq = np.repeat(np.arange(self.n_components), 5) n_samples = len(refstateseq) X = [h.gmms_[x].sample(1).flatten() for x in refstateseq] _ll, posteriors = h.score_samples(X) assert posteriors.shape == (n_samples, self.n_components) assert_allclose(posteriors.sum(axis=1), np.ones(n_samples)) _log_prob, stateseq = h.decode(X) assert_allclose(stateseq, refstateseq) def test_sample(self, n_samples=1000): h = hmm.GMMHMM(self.n_components, covariance_type=self.covariance_type) h.startprob_ = self.startprob h.transmat_ = self.transmat h.gmms_ = self.gmms X, state_sequence = h.sample(n_samples) assert X.shape == (n_samples, self.n_features) assert len(state_sequence) == n_samples @pytest.mark.parametrize("params", ["stmwc", "wt", "m"]) def test_fit(self, params, n_iter=5): h = hmm.GMMHMM(self.n_components, covariance_type=self.covariance_type, covars_prior=1.0) h.startprob_ = self.startprob h.transmat_ = normalized( self.transmat + np.diag(self.prng.rand(self.n_components)), 1) h.gmms_ = self.gmms lengths = [10] * 10 X, _state_sequence = h.sample(sum(lengths), random_state=self.prng) # Mess up the parameters and see if we can re-learn them. h.n_iter = 0 h.fit(X, lengths=lengths) h.transmat_ = normalized(self.prng.rand(self.n_components, self.n_components), axis=1) h.startprob_ = normalized(self.prng.rand(self.n_components)) assert_log_likelihood_increasing(h, X, lengths, n_iter) def test_fit_works_on_sequences_of_different_length(self): lengths = [3, 4, 5] X = self.prng.rand(sum(lengths), self.n_features) h = hmm.GMMHMM(self.n_components, covariance_type=self.covariance_type) # This shouldn't raise # ValueError: setting an array element with a sequence. h.fit(X, lengths=lengths) class TestGMMHMMWithDiagCovars(GMMHMMTestMixin): covariance_type = 'diag' @pytest.mark.xfail class TestGMMHMMWithTiedCovars(GMMHMMTestMixin): covariance_type = 'tied' @pytest.mark.xfail class TestGMMHMMWithFullCovars(GMMHMMTestMixin): covariance_type = 'full' hmmlearn-0.3.0/lib/hmmlearn/tests/test_gmm_hmm_multisequence.py000066400000000000000000000334401441755530000250350ustar00rootroot00000000000000import itertools import numpy as np from numpy.testing import assert_allclose import pytest from hmmlearn import hmm from hmmlearn.base import ConvergenceMonitor # Example multi-sequence data, arranged as shape # (n_sequences, n_samples, n_features) # # This data is a reduced subset of data_training.npy # from issue https://github.com/hmmlearn/hmmlearn/issues/410 # illustrating GMMHMM fit diverging during EM iterations. # # Transformations to reduce data volume: # - keep only first 3 of many sequences, discard rest # - keep only first 50 of many samples per sequence, discard rest # - keep only first 4 of 17 features per sample, discard rest EXAMPLE_SEQUENCES_ISSUE_410_PRUNED = np.array( [ np.array( [ [0.00992058, 0.44151747, 0.5395124, 0.40644765], [0.00962487, 0.45613006, 0.52375835, 0.3899082], [0.00915721, 0.47111648, 0.5103008, 0.3846845], [0.00916073, 0.4749602, 0.5241155, 0.39899495], [0.0090966, 0.47398633, 0.53792244, 0.41295874], [0.00953476, 0.47201437, 0.5322343, 0.41661483], [0.00916542, 0.4455471, 0.55598766, 0.40831617], [0.00906925, 0.43173638, 0.56246823, 0.39109665], [0.00826067, 0.4136997, 0.58712745, 0.39158684], [0.00828806, 0.41975173, 0.60497123, 0.38206288], [0.00788883, 0.397979, 0.63639283, 0.3627324], [0.00765208, 0.38908702, 0.65764546, 0.3516956], [0.00738148, 0.38130987, 0.6522844, 0.36725503], [0.00717299, 0.37383446, 0.6722188, 0.37951013], [0.0073711, 0.37058228, 0.6799041, 0.3860375], [0.00728311, 0.37892842, 0.65606904, 0.39165357], [0.00730301, 0.39218283, 0.6332023, 0.3976117], [0.00713718, 0.38652796, 0.6423802, 0.34927416], [0.00683423, 0.3656172, 0.68119335, 0.2912439], [0.00663389, 0.34920084, 0.68535674, 0.28290597], [0.00625478, 0.3525497, 0.6658849, 0.30369937], [0.00614696, 0.35593832, 0.65440905, 0.3450122], [0.00611069, 0.35073754, 0.6559732, 0.33597857], [0.00635504, 0.3360095, 0.6800729, 0.32301348], [0.00617533, 0.3444746, 0.6745925, 0.34155408], [0.00592057, 0.35373318, 0.66947186, 0.32476413], [0.00564618, 0.36178407, 0.6560819, 0.3297305], [0.00572176, 0.37058342, 0.6551206, 0.2967357], [0.00578371, 0.39031005, 0.64601576, 0.33421013], [0.00577161, 0.41922286, 0.6089396, 0.3717376], [0.00579954, 0.41518527, 0.60426843, 0.38774568], [0.00578072, 0.40165138, 0.6203536, 0.34574744], [0.00583212, 0.42201585, 0.60890085, 0.38103116], [0.00572761, 0.40093482, 0.63888615, 0.36249077], [0.00594841, 0.3804375, 0.6576098, 0.37927687], [0.0059343, 0.34200934, 0.693946, 0.3007063], [0.00591482, 0.3709248, 0.66136825, 0.32304856], [0.0055425, 0.41159946, 0.62043166, 0.3460799], [0.00548492, 0.40038764, 0.6440804, 0.33333993], [0.00552325, 0.36867827, 0.6703099, 0.30612737], [0.00553349, 0.35795027, 0.67543924, 0.27393535], [0.00558642, 0.4015568, 0.62600005, 0.31275502], [0.00565522, 0.40925154, 0.6178226, 0.3131643], [0.0058172, 0.42638385, 0.6077434, 0.33476466], [0.00585697, 0.40742254, 0.6218038, 0.37967283], [0.00591527, 0.4296229, 0.6016123, 0.3985932], [0.00604816, 0.43141186, 0.59317786, 0.42083132], [0.00621391, 0.4110697, 0.6092669, 0.38827285], [0.00656536, 0.39309287, 0.60035396, 0.41596898], [0.00693208, 0.37821782, 0.59813255, 0.4394344], ], dtype=np.float32, ), np.array( [ [0.00318667, 0.48804316, 0.52020603, 0.36232004], [0.00322638, 0.48808283, 0.5341949, 0.37973505], [0.00329762, 0.47688982, 0.5563834, 0.4047565], [0.00321911, 0.48151806, 0.54239404, 0.38407174], [0.00400121, 0.5309283, 0.49719027, 0.40301552], [0.00461331, 0.5856188, 0.44557935, 0.40280044], [0.0048873, 0.59214115, 0.4330637, 0.43839055], [0.00411017, 0.53695357, 0.49013752, 0.3832056], [0.00357234, 0.48548815, 0.54152006, 0.3475358], [0.00341532, 0.46990934, 0.57406586, 0.33449954], [0.00345838, 0.50714695, 0.51190466, 0.38789546], [0.00341552, 0.526225, 0.48219037, 0.41689718], [0.0034434, 0.5293161, 0.47968563, 0.4379618], [0.00342251, 0.49587825, 0.53558546, 0.4021806], [0.00329584, 0.5082987, 0.52165693, 0.4086313], [0.0031443, 0.519554, 0.49245426, 0.3961157], [0.0030792, 0.5455676, 0.45203313, 0.423718], [0.00320465, 0.5504491, 0.43865028, 0.45784852], [0.00329045, 0.52342314, 0.48308045, 0.455756], [0.00342623, 0.50302553, 0.495605, 0.45614803], [0.00347294, 0.48462915, 0.52492356, 0.41467494], [0.00352737, 0.45118907, 0.53679097, 0.38664353], [0.00349953, 0.40440804, 0.59128445, 0.3342833], [0.00343233, 0.385624, 0.61139005, 0.29829293], [0.00333838, 0.4443106, 0.55629146, 0.3360141], [0.00341689, 0.47139308, 0.5338217, 0.34617022], [0.00349784, 0.49085665, 0.5179333, 0.38301566], [0.00354852, 0.45714432, 0.5581238, 0.3514836], [0.00336909, 0.4432368, 0.5735479, 0.37587976], [0.00334855, 0.42174804, 0.60338825, 0.36684743], [0.00340526, 0.4299499, 0.5964124, 0.38658547], [0.00355591, 0.44496518, 0.5775449, 0.37693518], [0.00368621, 0.4641773, 0.54175687, 0.38873497], [0.00392683, 0.48638234, 0.52129495, 0.40342456], [0.00444013, 0.5311254, 0.4699971, 0.41572142], [0.00559078, 0.5725662, 0.42817408, 0.42607802], [0.00651121, 0.60758656, 0.3687854, 0.45583618], [0.00731191, 0.60793436, 0.36194792, 0.46369436], [0.0075791, 0.56968933, 0.409531, 0.40896642], [0.00813889, 0.5602184, 0.43285215, 0.35258207], [0.00887025, 0.5855726, 0.40227312, 0.35542902], [0.00897418, 0.609533, 0.35594714, 0.3942479], [0.01022383, 0.6129873, 0.3352133, 0.47778368], [0.0095141, 0.5595026, 0.4162315, 0.45395738], [0.00841102, 0.49951875, 0.48765272, 0.4517436], [0.00826918, 0.45066664, 0.5471808, 0.3476721], [0.01868482, 0.39936644, 0.55312306, 0.43391562], [0.04211371, 0.3675188, 0.56271124, 0.53548455], [0.07297403, 0.3576941, 0.57314515, 0.6578705], [0.10756538, 0.413695, 0.5337025, 0.653491], ], dtype=np.float32, ), np.array( [ [0.00302289, 0.47364405, 0.53537434, 0.45129624], [0.00311589, 0.45328405, 0.5570029, 0.42494437], [0.00315774, 0.44939527, 0.5723259, 0.4117937], [0.00314883, 0.46795434, 0.54135257, 0.3953221], [0.00309122, 0.49266115, 0.5156292, 0.4080569], [0.00312682, 0.49728185, 0.51451075, 0.41902116], [0.00323118, 0.49948877, 0.5244724, 0.45043528], [0.00325324, 0.47703367, 0.5608516, 0.42876282], [0.00322127, 0.4831656, 0.5520302, 0.43367508], [0.00314415, 0.5081604, 0.5305388, 0.47354758], [0.00307265, 0.51409185, 0.5208431, 0.46550223], [0.00312928, 0.5049119, 0.5267695, 0.44442567], [0.00313668, 0.47089654, 0.55917096, 0.37248307], [0.00320294, 0.45442477, 0.57189673, 0.38878405], [0.00317113, 0.43952465, 0.59469926, 0.3535012], [0.00323172, 0.46230638, 0.5713085, 0.39267808], [0.00323527, 0.50055844, 0.52773774, 0.4445253], [0.00322912, 0.5141386, 0.501789, 0.488796], [0.00312394, 0.53217465, 0.47047156, 0.49383518], [0.00305265, 0.5208433, 0.50014937, 0.459741], [0.00305853, 0.51386, 0.5120822, 0.43915167], [0.00311382, 0.47278965, 0.57036394, 0.45423037], [0.00318763, 0.46807718, 0.5674287, 0.43843716], [0.00318154, 0.47042188, 0.5779207, 0.44580007], [0.00303793, 0.46571356, 0.5818657, 0.4107192], [0.00299003, 0.44406536, 0.60040486, 0.38005733], [0.00296462, 0.44092676, 0.59199274, 0.3560812], [0.00307418, 0.45244223, 0.56540257, 0.33132356], [0.00302929, 0.47221994, 0.5470938, 0.36822143], [0.00301338, 0.47062206, 0.54911035, 0.3680197], [0.00291349, 0.45564204, 0.5619591, 0.3808152], [0.00295025, 0.47307628, 0.54229873, 0.35219967], [0.00289911, 0.4725018, 0.54288876, 0.38314143], [0.00291358, 0.45272404, 0.5728319, 0.37214962], [0.00294044, 0.4570347, 0.5649049, 0.42110214], [0.00298315, 0.4561437, 0.55468136, 0.40142712], [0.00303691, 0.49613172, 0.52464443, 0.42899716], [0.00300609, 0.49105296, 0.5310325, 0.41341364], [0.00305483, 0.532239, 0.485052, 0.46893936], [0.00298801, 0.51202387, 0.49897632, 0.45285705], [0.0030795, 0.5070261, 0.5062058, 0.44057184], [0.00303104, 0.47002167, 0.56080073, 0.40074068], [0.00308184, 0.4575324, 0.56803167, 0.3928257], [0.00291516, 0.44490653, 0.58391964, 0.40129626], [0.00289649, 0.4531514, 0.5818511, 0.407385], [0.00284487, 0.4488143, 0.5850243, 0.41675568], [0.00291576, 0.4616304, 0.57469726, 0.40793785], [0.00287342, 0.46473294, 0.54888374, 0.41002542], [0.00303153, 0.4966541, 0.5055506, 0.4316879], [0.00327755, 0.45762977, 0.529044, 0.4493881], ], dtype=np.float32, ), ] ) class StrictMonitor(ConvergenceMonitor): @property def converged(self): # The default ConvergenceMonitor regards some scenarios # as "converged" when they have not necessarily converged: # # 1. exhausting max iterations # 2. decreases in log_prob between successive EM iterations # # This second behaviour should (ignoring numerical problems) # never happen if the EM implementation is correct. EM is a # local optimisation method, it may not find a global maxima, # but log_prob should always be non-decreasing between each # pair of successive iterations. assert not np.isnan(self.history[-1]), "log_prob must not be nan" if len(self.history) < 2: return False assert self.history[-1] >= self.history[-2] - self.tol, \ "log_prob must be non-decreasing" return self.history[-1] - self.history[-2] < self.tol def make_permutations(items): sequence_indices = list(range(len(items))) return [list(p) for p in itertools.permutations(sequence_indices)] @pytest.mark.parametrize("covariance_type", ["diag", "spherical", "tied", "full"]) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_gmmhmm_multi_sequence_fit_invariant_to_sequence_ordering( covariance_type, implementation, init_params='mcw', verbose=False ): """ Sanity check GMM-HMM fit behaviour when run on multiple sequences aka multiple frames. Training data consumed during GMM-HMM fit is packed into a single array X containing one or more sequences. In the case where there are two or more input sequences, the ordering that the sequences are packed into X should not influence the results of the fit. Major differences in convergence during EM iterations by merely permuting sequence order in the input indicates a likely defect in the fit implementation. Note: the ordering of samples inside a given sequence is very meaningful, permuting the order of samples would destroy the the state transition structure in the input data. See issue 410 on github: https://github.com/hmmlearn/hmmlearn/issues/410 """ sequence_data = EXAMPLE_SEQUENCES_ISSUE_410_PRUNED scores = [] for p in make_permutations(sequence_data): sequences = sequence_data[p] X = np.concatenate(sequences) lengths = [len(seq) for seq in sequences] model = hmm.GMMHMM( n_components=2, n_mix=2, n_iter=100, covariance_type=covariance_type, verbose=verbose, init_params=init_params, random_state=1234, implementation=implementation ) # don't use random parameters for testing init = 1. / model.n_components model.startprob_ = np.full(model.n_components, init) model.transmat_ = \ np.full((model.n_components, model.n_components), init) model.monitor_ = StrictMonitor( model.monitor_.tol, model.monitor_.n_iter, model.monitor_.verbose, ) model.fit(X, lengths) assert model.monitor_.converged scores.append(model.score(X, lengths)) # Choice of rtol value is ad-hoc, no theoretical motivation. assert_allclose(scores, np.mean(scores), rtol=5e-03) hmmlearn-0.3.0/lib/hmmlearn/tests/test_gmm_hmm_new.py000066400000000000000000000237401441755530000227450ustar00rootroot00000000000000import numpy as np from numpy.testing import assert_allclose, assert_array_less import pytest from sklearn.utils import check_random_state from ..hmm import GMMHMM from .test_gmm_hmm import create_random_gmm from . import assert_log_likelihood_increasing, normalized def sample_from_parallelepiped(low, high, n_samples, random_state): (n_features,) = low.shape X = np.zeros((n_samples, n_features)) for i in range(n_features): X[:, i] = random_state.uniform(low[i], high[i], n_samples) return X def prep_params(n_comps, n_mix, n_features, covar_type, low, high, random_state): # the idea is to generate ``n_comps`` bounding boxes and then # generate ``n_mix`` mixture means in each of them dim_lims = np.zeros((n_comps + 1, n_features)) # this generates a sequence of coordinates, which are then used as # vertices of bounding boxes for mixtures dim_lims[1:] = np.cumsum( random_state.uniform(low, high, (n_comps, n_features)), axis=0 ) means = np.zeros((n_comps, n_mix, n_features)) for i, (left, right) in enumerate(zip(dim_lims, dim_lims[1:])): means[i] = sample_from_parallelepiped(left, right, n_mix, random_state) startprob = np.zeros(n_comps) startprob[0] = 1 transmat = normalized(random_state.uniform(size=(n_comps, n_comps)), axis=1) if covar_type == "spherical": covs = random_state.uniform(0.1, 5, size=(n_comps, n_mix)) elif covar_type == "diag": covs = random_state.uniform(0.1, 5, size=(n_comps, n_mix, n_features)) elif covar_type == "tied": covs = np.zeros((n_comps, n_features, n_features)) for i in range(n_comps): low = random_state.uniform(-2, 2, (n_features, n_features)) covs[i] = low.T @ low elif covar_type == "full": covs = np.zeros((n_comps, n_mix, n_features, n_features)) for i in range(n_comps): for j in range(n_mix): low = random_state.uniform(-2, 2, size=(n_features, n_features)) covs[i, j] = low.T @ low weights = normalized(random_state.uniform(size=(n_comps, n_mix)), axis=1) return covs, means, startprob, transmat, weights class GMMHMMTestMixin: n_components = 3 n_mix = 2 n_features = 2 low, high = 10, 15 def new_hmm(self, implementation): prng = np.random.RandomState(14) covars, means, startprob, transmat, weights = prep_params( self.n_components, self.n_mix, self.n_features, self.covariance_type, self.low, self.high, prng) h = GMMHMM(n_components=self.n_components, n_mix=self.n_mix, covariance_type=self.covariance_type, random_state=prng, implementation=implementation) h.startprob_ = startprob h.transmat_ = transmat h.weights_ = weights h.means_ = means h.covars_ = covars return h @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_check_bad_covariance_type(self, implementation): h = self.new_hmm(implementation) with pytest.raises(ValueError): h.covariance_type = "bad_covariance_type" h._check() @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_check_good_covariance_type(self, implementation): h = self.new_hmm(implementation) h._check() # should not raise any errors @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_sample(self, implementation): n_samples = 1000 h = self.new_hmm(implementation) X, states = h.sample(n_samples) assert X.shape == (n_samples, self.n_features) assert len(states) == n_samples @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_init(self, implementation): n_samples = 1000 h = self.new_hmm(implementation) X, _states = h.sample(n_samples) h._init(X, [n_samples]) h._check() # should not raise any errors @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_score_samples_and_decode(self, implementation): n_samples = 1000 h = self.new_hmm(implementation) X, states = h.sample(n_samples) _ll, posteriors = h.score_samples(X) assert_allclose(np.sum(posteriors, axis=1), np.ones(n_samples)) _viterbi_ll, decoded_states = h.decode(X) assert_allclose(states, decoded_states) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_fit(self, implementation): n_iter = 5 n_samples = 1000 lengths = None h = self.new_hmm(implementation) X, _state_sequence = h.sample(n_samples) # Mess up the parameters and see if we can re-learn them. covs0, means0, priors0, trans0, weights0 = prep_params( self.n_components, self.n_mix, self.n_features, self.covariance_type, self.low, self.high, np.random.RandomState(15) ) h.covars_ = covs0 * 100 h.means_ = means0 h.startprob_ = priors0 h.transmat_ = trans0 h.weights_ = weights0 assert_log_likelihood_increasing(h, X, lengths, n_iter) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_fit_sparse_data(self, implementation): n_samples = 1000 h = self.new_hmm(implementation) h.means_ *= 1000 # this will put gaussians very far apart X, _states = h.sample(n_samples) # this should not raise # "ValueError: array must not contain infs or NaNs" h._init(X, [1000]) h.fit(X) @pytest.mark.xfail @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_fit_zero_variance(self, implementation): # Example from issue #2 on GitHub. # this data has singular covariance matrix X = np.asarray([ [7.15000000e+02, 5.8500000e+02, 0.00000000e+00, 0.00000000e+00], [7.15000000e+02, 5.2000000e+02, 1.04705811e+00, -6.03696289e+01], [7.15000000e+02, 4.5500000e+02, 7.20886230e-01, -5.27055664e+01], [7.15000000e+02, 3.9000000e+02, -4.57946777e-01, -7.80605469e+01], [7.15000000e+02, 3.2500000e+02, -6.43127441e+00, -5.59954834e+01], [7.15000000e+02, 2.6000000e+02, -2.90063477e+00, -7.80220947e+01], [7.15000000e+02, 1.9500000e+02, 8.45532227e+00, -7.03294373e+01], [7.15000000e+02, 1.3000000e+02, 4.09387207e+00, -5.83621216e+01], [7.15000000e+02, 6.5000000e+01, -1.21667480e+00, -4.48131409e+01] ]) h = self.new_hmm(implementation) h.fit(X) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_criterion(self, implementation): random_state = check_random_state(2013) m1 = self.new_hmm(implementation) # Spread the means out to make this easier m1.means_ *= 10 X, _ = m1.sample(4000, random_state=random_state) aic = [] bic = [] ns = [2, 3, 4, 5] for n in ns: h = GMMHMM(n, n_mix=2, covariance_type=self.covariance_type, random_state=random_state, implementation=implementation) h.fit(X) aic.append(h.aic(X)) bic.append(h.bic(X)) assert np.all(aic) > 0 assert np.all(bic) > 0 # AIC / BIC pick the right model occasionally # assert ns[np.argmin(aic)] == self.n_components # assert ns[np.argmin(bic)] == self.n_components class TestGMMHMMWithSphericalCovars(GMMHMMTestMixin): covariance_type = 'spherical' class TestGMMHMMWithDiagCovars(GMMHMMTestMixin): covariance_type = 'diag' class TestGMMHMMWithTiedCovars(GMMHMMTestMixin): covariance_type = 'tied' class TestGMMHMMWithFullCovars(GMMHMMTestMixin): covariance_type = 'full' class TestGMMHMM_KmeansInit: @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_kmeans(self, implementation): # Generate two isolated cluster. # The second cluster has no. of points less than n_mix. np.random.seed(0) data1 = np.random.uniform(low=0, high=1, size=(100, 2)) data2 = np.random.uniform(low=5, high=6, size=(5, 2)) data = np.r_[data1, data2] model = GMMHMM(n_components=2, n_mix=10, n_iter=5, implementation=implementation) model.fit(data) # _init() should not fail here # test whether the means are bounded by the data lower- and upperbounds assert_array_less(0, model.means_) assert_array_less(model.means_, 6) class TestGMMHMM_MultiSequence: @pytest.mark.parametrize("covtype", ["diag", "spherical", "tied", "full"]) def test_chunked(sellf, covtype, init_params='mcw'): np.random.seed(0) gmm = create_random_gmm(3, 2, covariance_type=covtype, prng=0) gmm.covariances_ = gmm.covars_ data = gmm.sample(n_samples=1000)[0] model1 = GMMHMM(n_components=3, n_mix=2, covariance_type=covtype, random_state=1, init_params=init_params) model2 = GMMHMM(n_components=3, n_mix=2, covariance_type=covtype, random_state=1, init_params=init_params) # don't use random parameters for testing init = 1. / model1.n_components for model in (model1, model2): model.startprob_ = np.full(model.n_components, init) model.transmat_ = \ np.full((model.n_components, model.n_components), init) model1.fit(data) model2.fit(data, lengths=[200] * 5) assert_allclose(model1.means_, model2.means_, rtol=0, atol=1e-2) assert_allclose(model1.covars_, model2.covars_, rtol=0, atol=1e-3) assert_allclose(model1.weights_, model2.weights_, rtol=0, atol=1e-3) assert_allclose(model1.transmat_, model2.transmat_, rtol=0, atol=1e-2) hmmlearn-0.3.0/lib/hmmlearn/tests/test_kl_divergence.py000066400000000000000000000032531441755530000232510ustar00rootroot00000000000000import numpy as np from hmmlearn import _kl_divergence as _kl class TestKLDivergence: def test_dirichlet(self): v1 = [1, 2, 3, 4] v2 = [4, 3, 2, 1] assert _kl.kl_dirichlet(v1, v1) == 0 assert _kl.kl_dirichlet(v2, v2) == 0 assert _kl.kl_dirichlet(v1, v2) > 0 assert _kl.kl_dirichlet(v2, v1) > 0 def test_normal(self): assert _kl.kl_normal_distribution(0, 1, 0, 1) == 0 assert _kl.kl_normal_distribution(0, 1, 1, 1) > 0 def test_multivariate_normal(self): mean_p = [0] var_p = [[1]] kl_equal = _kl.kl_multivariate_normal_distribution( mean_p, var_p, mean_p, var_p) assert kl_equal == 0 # Compare with univariate implementation uv = _kl.kl_normal_distribution(0, 1, 0, 1) assert kl_equal == uv mean_q = [1] var_q = [[1]] kl_ne = _kl.kl_multivariate_normal_distribution( mean_p, var_p, mean_q, var_q) # Compare with univariate implementation uv = _kl.kl_normal_distribution(0, 1, 1, 1) assert kl_ne == uv def test_gamma(self): assert _kl.kl_gamma_distribution(1, .01, 1, .01) == 0 assert _kl.kl_gamma_distribution(1, .01, 2, .01) > 0 assert _kl.kl_gamma_distribution(1, .01, 1, .02) > 0 def test_wishart(self): dof1 = 952 scale1 = np.asarray([[339.8474024737109]]) dof2 = 1.0 scale2 = np.asarray([[0.001]]) kl_equal = _kl.kl_wishart_distribution(dof1, scale1, dof1, scale1) assert kl_equal == 0 kl_ne = _kl.kl_wishart_distribution(dof1, scale1, dof2, scale2) assert kl_ne > 0 hmmlearn-0.3.0/lib/hmmlearn/tests/test_multinomial_hmm.py000066400000000000000000000152221441755530000236420ustar00rootroot00000000000000import numpy as np from numpy.testing import assert_allclose import pytest from hmmlearn import hmm from . import assert_log_likelihood_increasing, normalized class TestMultinomialHMM: n_components = 2 n_features = 4 n_trials = 5 def new_hmm(self, impl): h = hmm.MultinomialHMM( n_components=self.n_components, n_trials=self.n_trials, implementation=impl) h.startprob_ = np.array([.6, .4]) h.transmat_ = np.array([[.8, .2], [.2, .8]]) h.emissionprob_ = np.array([[.5, .3, .1, .1], [.1, .1, .4, .4]]) return h @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_attributes(self, implementation): with pytest.raises(ValueError): h = self.new_hmm(implementation) h.emissionprob_ = [] h._check() with pytest.raises(ValueError): h.emissionprob_ = np.zeros((self.n_components - 2, self.n_features)) h._check() @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_score_samples(self, implementation): X = np.array([ [1, 1, 3, 0], [3, 1, 1, 0], [3, 0, 2, 0], [2, 2, 0, 1], [2, 2, 0, 1], [0, 1, 1, 3], [1, 0, 3, 1], [2, 0, 1, 2], [0, 2, 1, 2], [1, 0, 1, 3], ]) n_samples = X.shape[0] h = self.new_hmm(implementation) ll, posteriors = h.score_samples(X) assert posteriors.shape == (n_samples, self.n_components) assert_allclose(posteriors.sum(axis=1), np.ones(n_samples)) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_sample(self, implementation, n_samples=1000): h = self.new_hmm(implementation) X, state_sequence = h.sample(n_samples) assert X.ndim == 2 assert len(X) == len(state_sequence) == n_samples assert len(np.unique(X)) == self.n_trials + 1 assert (X.sum(axis=1) == self.n_trials).all() h.n_trials = None with pytest.raises(ValueError): h.sample(n_samples) h.n_trials = [1, 2, 3] with pytest.raises(ValueError): h.sample(n_samples) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_fit(self, implementation, params='ste', n_iter=5): h = self.new_hmm(implementation) h.params = params lengths = np.array([10] * 10) X, _state_sequence = h.sample(lengths.sum()) # Mess up the parameters and see if we can re-learn them. h.startprob_ = normalized(np.random.random(self.n_components)) h.transmat_ = normalized( np.random.random((self.n_components, self.n_components)), axis=1) h.emissionprob_ = normalized( np.random.random((self.n_components, self.n_features)), axis=1) # Also mess up trial counts. h.n_trials = None X[::2] *= 2 assert_log_likelihood_increasing(h, X, lengths, n_iter) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_fit_emissionprob(self, implementation): self.test_fit(implementation, 'e') @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_fit_with_init(self, implementation, params='ste', n_iter=5): lengths = [10] * 10 h = self.new_hmm(implementation) X, _state_sequence = h.sample(sum(lengths)) # use init_function to initialize paramerters h = hmm.MultinomialHMM( n_components=self.n_components, n_trials=self.n_trials, params=params, init_params=params) h._init(X, lengths) assert_log_likelihood_increasing(h, X, lengths, n_iter) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test__check_and_set_multinomial_n_features_n_trials( self, implementation): h = hmm.MultinomialHMM( n_components=2, n_trials=None, implementation=implementation) h._check_and_set_n_features( np.array([[0, 2, 3, 0], [1, 0, 2, 2]])) assert (h.n_trials == 5).all() with pytest.raises(ValueError): # wrong dimensions h._check_and_set_n_features( np.array([[0, 0, 2, 1, 3, 1, 1]])) with pytest.raises(ValueError): # not added up to n_trials h._check_and_set_n_features( np.array([[0, 0, 1, 1], [3, 1, 1, 0]])) with pytest.raises(ValueError): # non-integral h._check_and_set_n_features( np.array([[0., 2., 0., 3.], [0.0, 2.5, 2.5, 0.0]])) with pytest.raises(ValueError): # negative integers h._check_and_set_n_features( np.array([[0, -2, 1, 6], [5, 6, -6, 0]])) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_compare_with_categorical_hmm(self, implementation): n_components = 2 # ['Rainy', 'Sunny'] n_features = 3 # ['walk', 'shop', 'clean'] n_trials = 1 startprob = np.array([0.6, 0.4]) transmat = np.array([[0.7, 0.3], [0.4, 0.6]]) emissionprob = np.array([[0.1, 0.4, 0.5], [0.6, 0.3, 0.1]]) h1 = hmm.MultinomialHMM( n_components=n_components, n_trials=n_trials, implementation=implementation) h2 = hmm.CategoricalHMM( n_components=n_components, implementation=implementation) h1.startprob_ = startprob h2.startprob_ = startprob h1.transmat_ = transmat h2.transmat_ = transmat h1.emissionprob_ = emissionprob h2.emissionprob_ = emissionprob X1 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) X2 = [[0], [1], [2]] # different input format for CategoricalHMM log_prob1, state_sequence1 = h1.decode(X1, algorithm="viterbi") log_prob2, state_sequence2 = h2.decode(X2, algorithm="viterbi") assert round(np.exp(log_prob1), 5) == 0.01344 assert round(np.exp(log_prob2), 5) == 0.01344 assert_allclose(state_sequence1, [1, 0, 0]) assert_allclose(state_sequence2, [1, 0, 0]) posteriors1 = h1.predict_proba(X1) assert_allclose(posteriors1, [ [0.23170303, 0.76829697], [0.62406281, 0.37593719], [0.86397706, 0.13602294], ], rtol=0, atol=1e-6) posteriors2 = h2.predict_proba(X2) assert_allclose(posteriors2, [ [0.23170303, 0.76829697], [0.62406281, 0.37593719], [0.86397706, 0.13602294], ], rtol=0, atol=1e-6) hmmlearn-0.3.0/lib/hmmlearn/tests/test_poisson_hmm.py000066400000000000000000000070721441755530000230060ustar00rootroot00000000000000import numpy as np from numpy.testing import assert_allclose import pytest from sklearn.utils import check_random_state from hmmlearn import hmm from . import assert_log_likelihood_increasing, normalized class TestPoissonHMM: n_components = 2 n_features = 3 def new_hmm(self, impl): h = hmm.PoissonHMM(self.n_components, implementation=impl, random_state=0) h.startprob_ = np.array([0.6, 0.4]) h.transmat_ = np.array([[0.7, 0.3], [0.4, 0.6]]) h.lambdas_ = np.array([[3.1, 1.4, 4.5], [1.6, 5.3, 0.1]]) return h @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_attributes(self, implementation): with pytest.raises(ValueError): h = self.new_hmm(implementation) h.lambdas_ = [] h._check() with pytest.raises(ValueError): h.lambdas_ = np.zeros((self.n_components - 2, self.n_features)) h._check() @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_score_samples(self, implementation, n_samples=1000): h = self.new_hmm(implementation) X, state_sequence = h.sample(n_samples) assert X.ndim == 2 assert len(X) == len(state_sequence) == n_samples ll, posteriors = h.score_samples(X) assert posteriors.shape == (n_samples, self.n_components) assert_allclose(posteriors.sum(axis=1), np.ones(n_samples)) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_fit(self, implementation, params='stl', n_iter=5): h = self.new_hmm(implementation) h.params = params lengths = np.array([10] * 10) X, _state_sequence = h.sample(lengths.sum()) # Mess up the parameters and see if we can re-learn them. np.random.seed(0) h.startprob_ = normalized(np.random.random(self.n_components)) h.transmat_ = normalized( np.random.random((self.n_components, self.n_components)), axis=1) h.lambdas_ = np.random.gamma( shape=2, size=(self.n_components, self.n_features)) assert_log_likelihood_increasing(h, X, lengths, n_iter) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_fit_lambdas(self, implementation): self.test_fit(implementation, 'l') @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_fit_with_init(self, implementation, params='stl', n_iter=5): lengths = [10] * 10 h = self.new_hmm(implementation) X, _state_sequence = h.sample(sum(lengths)) # use init_function to initialize paramerters h = hmm.PoissonHMM(self.n_components, params=params, init_params=params) h._init(X, lengths) assert_log_likelihood_increasing(h, X, lengths, n_iter) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_criterion(self, implementation): random_state = check_random_state(412) m1 = self.new_hmm(implementation) X, _ = m1.sample(2000, random_state=random_state) aic = [] bic = [] ns = [2, 3, 4] for n in ns: h = hmm.PoissonHMM(n, n_iter=500, random_state=random_state, implementation=implementation) h.fit(X) aic.append(h.aic(X)) bic.append(h.bic(X)) assert np.all(aic) > 0 assert np.all(bic) > 0 # AIC / BIC pick the right model occasionally # assert ns[np.argmin(aic)] == 2 # assert ns[np.argmin(bic)] == 2 hmmlearn-0.3.0/lib/hmmlearn/tests/test_utils.py000066400000000000000000000027001441755530000216040ustar00rootroot00000000000000import numpy as np from numpy.testing import assert_allclose from hmmlearn.utils import normalize, fill_covars def test_normalize(): A = np.random.normal(42., size=128) A[np.random.choice(len(A), size=16)] = 0.0 assert (A == 0.0).any() normalize(A) assert_allclose(A.sum(), 1.) def test_normalize_along_axis(): A = np.random.normal(42., size=(128, 4)) for axis in range(A.ndim): A[np.random.choice(len(A), size=16), axis] = 0.0 assert (A[:, axis] == 0.0).any() normalize(A, axis=axis) assert_allclose(A.sum(axis=axis), 1.) def test_fill_covars(): full = np.arange(12).reshape(3, 2, 2) + 1 np.testing.assert_equal(fill_covars(full, 'full', 3, 2), full) diag = np.arange(6).reshape(3, 2) + 1 expected = np.array([[[1, 0], [0, 2]], [[3, 0], [0, 4]], [[5, 0], [0, 6]]]) np.testing.assert_equal(fill_covars(diag, 'diag', 3, 2), expected) tied = np.arange(4).reshape(2, 2) + 1 expected = np.array([[[1, 2], [3, 4]], [[1, 2], [3, 4]], [[1, 2], [3, 4]]]) np.testing.assert_equal(fill_covars(tied, 'tied', 3, 2), expected) spherical = np.array([1, 2, 3]) expected = np.array([[[1, 0], [0, 1]], [[2, 0], [0, 2]], [[3, 0], [0, 3]]]) np.testing.assert_equal( fill_covars(spherical, 'spherical', 3, 2), expected) hmmlearn-0.3.0/lib/hmmlearn/tests/test_variational_categorical.py000066400000000000000000000262451441755530000253240ustar00rootroot00000000000000import numpy as np import pytest from sklearn.utils import check_random_state from hmmlearn import hmm, vhmm from . import ( assert_log_likelihood_increasing, compare_variational_and_em_models, vi_uniform_startprob_and_transmat) class TestVariationalCategorical: @pytest.fixture(autouse=True) def setup(self): # We fix the random state here to demonstrate that the model will # successfully remove "unnecessary" states. In practice, # one should not set the random_state, and perform multiple # training steps, and take the model with the best lower-bound self.n_components = 3 self.implementations = ["scaling", "log"] @staticmethod def get_beal_models(): m1 = hmm.CategoricalHMM(3, init_params="") m1.n_features = 3 m1.startprob_ = np.array([1/3., 1/3., 1/3.]) m1.transmat_ = np.array([[0, 1, 0], [0, 0, 1], [1, 0, 0]]) m1.emissionprob_ = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) m2 = hmm.CategoricalHMM(3) m2.n_features = 3 m2.startprob_ = np.array([1/3., 1/3., 1/3.]) m2.transmat_ = np.array([[0, 0, 1], [1, 0, 0], [0, 1, 0]]) m2.emissionprob_ = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) m3 = hmm.CategoricalHMM(1) m3.n_features = 3 m3.startprob_ = np.array([1]) m3.transmat_ = np.array([[1]]) m3.emissionprob_ = np.array([[0.5, 0.5, 0]]) return m1, m2, m3 @classmethod def get_from_one_beal(cls, N, length, rs=None): # Just fit the first of the beal models model = cls.get_beal_models()[0] sequences = [] lengths = [] for i in range(N): sequences.append( model.sample(length, random_state=check_random_state(rs))[0]) lengths.append(len(sequences[-1])) sequences = np.concatenate(sequences) return sequences, lengths @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_init_priors(self, implementation): sequences, lengths = self.get_from_one_beal(7, 100, None) model = vhmm.VariationalCategoricalHMM( 4, n_iter=500, random_state=1984, init_params="", implementation=implementation) model.pi_prior_ = np.full((4,), .25) model.pi_posterior_ = np.full((4,), 7/4) model.transmat_prior_ = np.full((4, 4), .25) model.transmat_posterior_ = np.full((4, 4), 7/4) model.emissionprob_prior_ = np.full((4, 3), 1/3) model.emissionprob_posterior_ = np.asarray([[.3, .4, .3], [.8, .1, .1], [.2, .2, .6], [.2, .6, .2]]) assert_log_likelihood_increasing(model, sequences, lengths, 10) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_n_features(self, implementation): sequences, lengths = self.get_from_one_beal(7, 100, None) # Learn n_Features model = vhmm.VariationalCategoricalHMM( 4, implementation=implementation) assert_log_likelihood_increasing(model, sequences, lengths, 10) assert model.n_features == 3 # Respect n_features model = vhmm.VariationalCategoricalHMM( 4, implementation=implementation, n_features=5) assert_log_likelihood_increasing(model, sequences, lengths, 10) assert model.n_features == 5 # Too few features with pytest.raises(ValueError): model = vhmm.VariationalCategoricalHMM( 4, n_iter=500, random_state=1984, implementation=implementation) model.n_features = 2 assert_log_likelihood_increasing(model, sequences, lengths, 10) # No Negative Values with pytest.raises(ValueError): model = vhmm.VariationalCategoricalHMM( 4, n_iter=500, random_state=1984, implementation=implementation) sequences[0] = -1 assert_log_likelihood_increasing(model, sequences, lengths, 10) # Must be integers with pytest.raises(ValueError): model = vhmm.VariationalCategoricalHMM( 4, n_iter=500, random_state=1984, implementation=implementation) sequences = sequences.astype(float) assert_log_likelihood_increasing(model, sequences, lengths, 10) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_init_incorrect_priors(self, implementation): sequences, lengths = self.get_from_one_beal(7, 100, None) # Test startprob shape with pytest.raises(ValueError): model = vhmm.VariationalCategoricalHMM( 4, n_iter=500, random_state=1984, init_params="te", implementation=implementation) model.startprob_prior_ = np.full((3,), .25) model.startprob_posterior_ = np.full((4,), 7/4) assert_log_likelihood_increasing(model, sequences, lengths, 10) with pytest.raises(ValueError): model = vhmm.VariationalCategoricalHMM( 4, n_iter=500, random_state=1984, init_params="te", implementation=implementation) model.startprob_prior_ = np.full((4,), .25) model.startprob_posterior_ = np.full((3,), 7/4) assert_log_likelihood_increasing(model, sequences, lengths, 10) # Test transmat shape with pytest.raises(ValueError): model = vhmm.VariationalCategoricalHMM( 4, n_iter=500, random_state=1984, init_params="se", implementation=implementation) model.transmat_prior_ = np.full((3, 3), .25) model.transmat_posterior_ = np.full((4, 4), .25) assert_log_likelihood_increasing(model, sequences, lengths, 10) with pytest.raises(ValueError): model = vhmm.VariationalCategoricalHMM( 4, n_iter=500, random_state=1984, init_params="se", implementation=implementation) model.transmat_prior_ = np.full((4, 4), .25) model.transmat_posterior_ = np.full((3, 3), 7/4) assert_log_likelihood_increasing(model, sequences, lengths, 10) # Test emission shape with pytest.raises(ValueError): model = vhmm.VariationalCategoricalHMM( 4, n_iter=500, random_state=1984, init_params="st", implementation=implementation) model.emissionprob_prior_ = np.full((3, 3), 1/3) model.emissionprob_posterior_ = np.asarray([[.3, .4, .3], [.8, .1, .1], [.2, .2, .6], [.2, .6, .2]]) assert_log_likelihood_increasing(model, sequences, lengths, 10) # Test too many n_features with pytest.raises(ValueError): model = vhmm.VariationalCategoricalHMM( 4, n_iter=500, random_state=1984, init_params="se", implementation=implementation) model.emissionprob_prior_ = np.full((4, 4), 7/4) model.emissionprob_posterior_ = np.full((4, 4), .25) model.n_features_ = 10 assert_log_likelihood_increasing(model, sequences, lengths, 10) # Too small n_features with pytest.raises(ValueError): model = vhmm.VariationalCategoricalHMM( 4, n_iter=500, random_state=1984, init_params="se", implementation=implementation) model.emissionprob_prior_ = np.full((4, 4), 7/4) model.emissionprob_posterior_ = np.full((4, 4), .25) model.n_features_ = 1 assert_log_likelihood_increasing(model, sequences, lengths, 10) # Test that setting the desired prior value works model = vhmm.VariationalCategoricalHMM( 4, n_iter=500, random_state=1984, init_params="ste", implementation=implementation, startprob_prior=1, transmat_prior=2, emissionprob_prior=3) assert_log_likelihood_increasing(model, sequences, lengths, 10) assert np.all(model.startprob_prior_ == 1) assert np.all(model.transmat_prior_ == 2) assert np.all(model.emissionprob_prior_ == 3) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_fit_beal(self, implementation): rs = check_random_state(1984) m1, m2, m3 = self.get_beal_models() sequences = [] lengths = [] for i in range(7): for m in [m1, m2, m3]: sequences.append(m.sample(39, random_state=rs)[0]) lengths.append(len(sequences[-1])) sequences = np.concatenate(sequences) model = vhmm.VariationalCategoricalHMM(12, n_iter=500, implementation=implementation, tol=1e-6, random_state=rs, verbose=False) assert_log_likelihood_increasing(model, sequences, lengths, 100) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_fit_and_compare_with_em(self, implementation): # Explicitly setting Random State to test that certain # model states will become "unused" sequences, lengths = self.get_from_one_beal(7, 100, 1984) model = vhmm.VariationalCategoricalHMM( 4, n_iter=500, random_state=1984, init_params="e", implementation=implementation) vi_uniform_startprob_and_transmat(model, lengths) model.fit(sequences, lengths) # The 1st hidden state will be "unused" assert (model.transmat_posterior_[1, :] == pytest.approx(.25, rel=1e-3)) assert (model.emissionprob_posterior_[1, :] == pytest.approx(.3333, rel=1e-3)) # An EM Model should behave the same behavior as a Variational Model, # When initialized with the normalized probabilities of the mode of the # Variational MOdel. em_hmm = hmm.CategoricalHMM(n_components=4, init_params="") em_hmm.startprob_ = model.startprob_ em_hmm.transmat_ = model.transmat_ em_hmm.emissionprob_ = model.emissionprob_ compare_variational_and_em_models(model, em_hmm, sequences, lengths) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_fit_length_1_sequences(self, implementation): sequences1, lengths1 = self.get_from_one_beal(7, 100, 1984) # Include some length 1 sequences sequences2, lengths2 = self.get_from_one_beal(1, 1, 1984) sequences = np.concatenate([sequences1, sequences2]) lengths = np.concatenate([lengths1, lengths2]) model = vhmm.VariationalCategoricalHMM( 4, n_iter=500, random_state=1984, implementation=implementation) assert_log_likelihood_increasing(model, sequences, lengths, 10) hmmlearn-0.3.0/lib/hmmlearn/tests/test_variational_gaussian.py000066400000000000000000000515521441755530000246600ustar00rootroot00000000000000import numpy as np import pytest from sklearn.utils import check_random_state from hmmlearn import hmm, vhmm from . import ( assert_log_likelihood_increasing, compare_variational_and_em_models, make_covar_matrix, normalized, vi_uniform_startprob_and_transmat) def get_mcgrory_titterington(): m1 = hmm.GaussianHMM(4, init_params="") m1.n_features = 4 m1.startprob_ = np.array([1/4., 1/4., 1/4., 1/4.]) m1.transmat_ = np.array([[0.2, 0.2, 0.3, 0.3], [0.3, 0.2, 0.2, 0.3], [0.2, 0.3, 0.3, 0.2], [0.3, 0.3, 0.2, 0.2]]) m1.means_ = np.array([[-1.5], [0], [1.5], [3.]]) m1.covars_ = np.sqrt([[0.25], [0.25], [0.25], [0.25]]) return m1 def get_sequences(length, N, model, rs=None): sequences = [] lengths = [] rs = check_random_state(rs) for i in range(N): sequences.append( model.sample(length, random_state=rs)[0]) lengths.append(len(sequences[-1])) sequences = np.concatenate(sequences) return sequences, lengths class _TestGaussian: @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_random_fit(self, implementation, params='stmc', n_features=3, n_components=3, **kwargs): h = hmm.GaussianHMM(n_components, self.covariance_type, implementation=implementation, init_params="") rs = check_random_state(1) h.startprob_ = normalized(rs.rand(n_components)) h.transmat_ = normalized( rs.rand(n_components, n_components), axis=1) h.means_ = rs.randint(-20, 20, (n_components, n_features)) h.covars_ = make_covar_matrix( self.covariance_type, n_components, n_features, random_state=rs) lengths = [200] * 5 X, _state_sequence = h.sample(sum(lengths), random_state=rs) # Now learn a model model = vhmm.VariationalGaussianHMM( n_components, n_iter=50, tol=1e-9, random_state=rs, covariance_type=self.covariance_type, implementation=implementation) assert_log_likelihood_increasing(model, X, lengths, n_iter=10) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_fit_mcgrory_titterington1d(self, implementation): random_state = check_random_state(234234) # Setup to assure convergence sequences, lengths = get_sequences(500, 1, model=get_mcgrory_titterington(), rs=random_state) model = vhmm.VariationalGaussianHMM( 5, n_iter=1000, tol=1e-9, random_state=random_state, init_params="mc", covariance_type=self.covariance_type, implementation=implementation) vi_uniform_startprob_and_transmat(model, lengths) model.fit(sequences, lengths) # Perform one check that we are converging to the right answer assert (model.means_posterior_[-1][0] == pytest.approx(self.test_fit_mcgrory_titterington1d_mean)), \ model.means_posterior_ em_hmm = hmm.GaussianHMM( n_components=model.n_components, implementation=implementation, covariance_type=self.covariance_type, ) em_hmm.startprob_ = model.startprob_ em_hmm.transmat_ = model.transmat_ em_hmm.means_ = model.means_posterior_ em_hmm.covars_ = model._covars_ compare_variational_and_em_models(model, em_hmm, sequences, lengths) @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_common_initialization(self, implementation): sequences, lengths = get_sequences(50, 10, model=get_mcgrory_titterington()) with pytest.raises(ValueError): model = vhmm.VariationalGaussianHMM( 4, n_iter=500, tol=1e-9, covariance_type="incorrect", implementation=implementation) assert_log_likelihood_increasing(model, sequences, lengths, 10) with pytest.raises(ValueError): model = vhmm.VariationalGaussianHMM( 4, n_iter=500, tol=1e-9, covariance_type="incorrect", init_params="", implementation=implementation) model.startprob_= np.asarray([.25, .25, .25, .25]) model.score(sequences, lengths) # Manually setup means - should converge model = vhmm.VariationalGaussianHMM( 4, n_iter=500, tol=1e-9, init_params="stc", covariance_type=self.covariance_type, implementation=implementation) model.means_prior_ = [[1], [1], [1], [1]] model.means_posterior_ = [[2], [1], [3], [4]] model.beta_prior_ = [1, 1, 1, 1] model.beta_posterior_ = [1, 1, 1, 1] assert_log_likelihood_increasing(model, sequences, lengths, 10) # Means have wrong shape with pytest.raises(ValueError): model = vhmm.VariationalGaussianHMM( 4, n_iter=500, tol=1e-9, init_params="stc", covariance_type=self.covariance_type, implementation=implementation) model.means_prior_ = [[1], [1], [1]] model.means_posterior_ = [[1], [1], [1], [1]] model.beta_prior_ = [1, 1, 1, 1] model.beta_posterior_ = [1, 1, 1, 1] assert_log_likelihood_increasing(model, sequences, lengths, 10) with pytest.raises(ValueError): model = vhmm.VariationalGaussianHMM( 4, n_iter=500, tol=1e-9, init_params="stc", covariance_type=self.covariance_type, implementation=implementation) model.means_prior_ = [[1], [1], [1], [1]] model.means_posterior_ = [[1], [1], [1]] model.beta_prior_ = [1, 1, 1, 1] model.beta_posterior_ = [1, 1, 1, 1] assert_log_likelihood_increasing(model, sequences, lengths, 10) # beta's have wrong shape with pytest.raises(ValueError): model = vhmm.VariationalGaussianHMM( 4, n_iter=500, tol=1e-9, init_params="stc", covariance_type=self.covariance_type, implementation=implementation) model.means_prior_ = [[1], [1], [1], [1]] model.means_posterior_ = [[2], [1], [3], [4]] model.beta_prior_ = [1, 1, 1] model.beta_posterior_ = [1, 1, 1, 1] assert_log_likelihood_increasing(model, sequences, lengths, 10) with pytest.raises(ValueError): model = vhmm.VariationalGaussianHMM( 4, n_iter=500, tol=1e-9, init_params="stc", covariance_type=self.covariance_type, implementation=implementation) model.means_prior_ = [[1], [1], [1], [1]] model.means_posterior_ = [[2], [1], [3], [4]] model.beta_prior_ = [1, 1, 1, 1] model.beta_posterior_ = [1, 1, 1] assert_log_likelihood_increasing(model, sequences, lengths, 10) class TestFull(_TestGaussian): covariance_type = "full" test_fit_mcgrory_titterington1d_mean = 1.41058519 def new_for_init(self, implementation): model = vhmm.VariationalGaussianHMM( 4, n_iter=500, tol=1e-9, init_params="stm", covariance_type=self.covariance_type, implementation=implementation) model.dof_prior_ = [1, 1, 1, 1] model.dof_posterior_ = [1, 1, 1, 1] model.scale_prior_ = [[[2.]], [[2.]], [[2.]], [[2]]] model.scale_posterior_ = [[[2.]], [[2.]], [[2.]], [[2.]]] return model @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_initialization(self, implementation): random_state = check_random_state(234234) sequences, lengths = get_sequences( 50, 10, model=get_mcgrory_titterington()) # dof's have wrong shape with pytest.raises(ValueError): model = self.new_for_init(implementation) model.dof_prior_ = [1, 1, 1] assert_log_likelihood_increasing(model, sequences, lengths, 10) with pytest.raises(ValueError): model = self.new_for_init(implementation) model.dof_posterior_ = [1, 1, 1] assert_log_likelihood_increasing(model, sequences, lengths, 10) # scales's have wrong shape with pytest.raises(ValueError): model = self.new_for_init(implementation) model.scale_prior_ = [[[2.]], [[2.]], [[2.]]] assert_log_likelihood_increasing(model, sequences, lengths, 10) with pytest.raises(ValueError): model = self.new_for_init(implementation) model.scale_posterior_ = [[2.]], [[2.]], [[2.]] # this is wrong assert_log_likelihood_increasing(model, sequences, lengths, 10) # Manually setup covariance with pytest.raises(ValueError): model = vhmm.VariationalGaussianHMM( 4, n_iter=500, tol=1e-9, init_params="stm", covariance_type="incorrect", implementation=implementation) assert_log_likelihood_increasing(model, sequences, lengths, 10) # Set priors correctly via params model = vhmm.VariationalGaussianHMM( 4, n_iter=500, tol=1e-9, random_state=random_state, covariance_type=self.covariance_type, implementation=implementation, means_prior=[[0.], [0.], [0.], [0.]], beta_prior=[2., 2., 2., 2.], dof_prior=[2., 2., 2., 2.], scale_prior=[[[2.]], [[2.]], [[2.]], [[2.]]]) assert_log_likelihood_increasing(model, sequences, lengths, 10) assert np.all(model.means_prior_ == 0) assert np.all(model.beta_prior_ == 2.) assert np.all(model.dof_prior_ == 2.) assert np.all(model.scale_prior_ == 2.) # Manually set everything model = vhmm.VariationalGaussianHMM( 4, n_iter=500, tol=1e-9, random_state=random_state, covariance_type=self.covariance_type, implementation=implementation, init_params="", ) model.means_prior_ = [[0.], [0.], [0.], [0.]] model.means_posterior_ = [[2], [1], [3], [4]] model.beta_prior_ = [2., 2., 2., 2.] model.beta_posterior_ = [1, 1, 1, 1] model.dof_prior_ = [2., 2., 2., 2.] model.dof_posterior_ = [1, 1, 1, 1] modelscale_prior_ = [[[2.]], [[2.]], [[2.]], [[2.]]] model.scale_posterior_ = [[[2.]], [[2.]], [[2.]], [[2.]]] assert_log_likelihood_increasing(model, sequences, lengths, 10) class TestTied(_TestGaussian): test_fit_mcgrory_titterington1d_mean = 1.4774254 covariance_type = "tied" def new_for_init(self, implementation): model = vhmm.VariationalGaussianHMM( 4, n_iter=500, tol=1e-9, init_params="stm", covariance_type=self.covariance_type, implementation=implementation) model.dof_prior_ = 1 model.dof_posterior_ = 1 model.scale_prior_ = [[2]] model.scale_posterior_ = [[2]] return model @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_initialization(self, implementation): random_state = check_random_state(234234) sequences, lengths = get_sequences( 50, 10, model=get_mcgrory_titterington()) # dof's have wrong shape with pytest.raises(ValueError): model = self.new_for_init(implementation) model.dof_prior_ = [1] assert_log_likelihood_increasing(model, sequences, lengths, 10) with pytest.raises(ValueError): model = self.new_for_init(implementation) model.dof_posterior_ = [1] assert_log_likelihood_increasing(model, sequences, lengths, 10) # scales's have wrong shape with pytest.raises(ValueError): model = self.new_for_init(implementation) model.scale_prior_ = [[[2]]] assert_log_likelihood_increasing(model, sequences, lengths, 10) with pytest.raises(ValueError): model = self.new_for_init(implementation) model.scale_posterior_ = [[[2.]], [[2.]], [[2.]]] # this is wrong assert_log_likelihood_increasing(model, sequences, lengths, 10) # Manually setup covariance with pytest.raises(ValueError): model = vhmm.VariationalGaussianHMM( 4, n_iter=500, tol=1e-9, init_params="stm", covariance_type="incorrect", implementation=implementation) assert_log_likelihood_increasing(model, sequences, lengths, 10) # Set priors correctly via params model = vhmm.VariationalGaussianHMM( 4, n_iter=500, tol=1e-9, random_state=random_state, covariance_type=self.covariance_type, implementation=implementation, means_prior=[[0.], [0.], [0.], [0.]], beta_prior=[2., 2., 2., 2.], dof_prior=2, scale_prior=[[2]], ) assert_log_likelihood_increasing(model, sequences, lengths, 10) assert np.all(model.means_prior_ == 0) assert np.all(model.beta_prior_ == 2.) assert np.all(model.dof_prior_ == 2.) assert np.all(model.scale_prior_ == 2.) # Manually set everything model = vhmm.VariationalGaussianHMM( 4, n_iter=500, tol=1e-9, random_state=random_state, covariance_type=self.covariance_type, implementation=implementation, init_params="", ) model.means_prior_ = [[0.], [0.], [0.], [0.]] model.means_posterior_ = [[2], [1], [3], [4]] model.beta_prior_ = [2., 2., 2., 2.] model.beta_posterior_ = [1, 1, 1, 1] model.dof_prior_ = 2 model.dof_posterior_ = 1 model.scale_prior_ = [[2]] model.scale_posterior_ = [[2]] assert_log_likelihood_increasing(model, sequences, lengths, 10) class TestSpherical(_TestGaussian): test_fit_mcgrory_titterington1d_mean = 1.4105851867634462 covariance_type = "spherical" def new_for_init(self, implementation): model = vhmm.VariationalGaussianHMM( 4, n_iter=500, tol=1e-9, init_params="stm", covariance_type=self.covariance_type, implementation=implementation) model.dof_prior_ = [1, 1, 1, 1] model.dof_posterior_ = [1, 1, 1, 1] model.scale_prior_ = [2, 2, 2, 2] model.scale_posterior_ = [2, 2, 2, 2] return model @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_initialization(self, implementation): random_state = check_random_state(234234) sequences, lengths = get_sequences( 50, 10, model=get_mcgrory_titterington()) # dof's have wrong shape with pytest.raises(ValueError): model = self.new_for_init(implementation) model.dof_prior_ = [1, 1, 1] assert_log_likelihood_increasing(model, sequences, lengths, 10) with pytest.raises(ValueError): model = self.new_for_init(implementation) model.dof_posterior_ = [1, 1, 1] assert_log_likelihood_increasing(model, sequences, lengths, 10) # scales's have wrong shape with pytest.raises(ValueError): model = self.new_for_init(implementation) model.scale_prior_ = [2, 2, 2] assert_log_likelihood_increasing(model, sequences, lengths, 10) with pytest.raises(ValueError): model = self.new_for_init(implementation) model.scale_posterior_ = [2, 2, 2] # this is wrong assert_log_likelihood_increasing(model, sequences, lengths, 10) # Manually setup covariance with pytest.raises(ValueError): model = vhmm.VariationalGaussianHMM( 4, n_iter=500, tol=1e-9, init_params="stm", covariance_type="incorrect", implementation=implementation) assert_log_likelihood_increasing(model, sequences, lengths, 10) # Set priors correctly via params model = vhmm.VariationalGaussianHMM( 4, n_iter=500, tol=1e-9, random_state=random_state, covariance_type=self.covariance_type, implementation=implementation, means_prior=[[0.], [0.], [0.], [0.]], beta_prior=[2., 2., 2., 2.], dof_prior=[2., 2., 2., 2.], scale_prior=[2, 2, 2, 2], ) assert_log_likelihood_increasing(model, sequences, lengths, 10) assert np.all(model.means_prior_ == 0) assert np.all(model.beta_prior_ == 2.) assert np.all(model.dof_prior_ == 2.) assert np.all(model.scale_prior_ == 2.) # Manually set everything model = vhmm.VariationalGaussianHMM( 4, n_iter=500, tol=1e-9, random_state=random_state, covariance_type=self.covariance_type, implementation=implementation, init_params="", ) model.means_prior_ = [[0.], [0.], [0.], [0.]] model.means_posterior_ = [[2], [1], [3], [4]] model.beta_prior_ = [2., 2., 2., 2.] model.beta_posterior_ = [1, 1, 1, 1] model.dof_prior_ = [2., 2., 2., 2.] model.dof_posterior_ = [1, 1, 1, 1] model.scale_prior_ = [2, 2, 2, 2] model.scale_posterior_ = [2, 2, 2, 2] assert_log_likelihood_increasing(model, sequences, lengths, 10) class TestDiagonal(_TestGaussian): test_fit_mcgrory_titterington1d_mean = 1.410585186763446 covariance_type = "diag" def new_for_init(self, implementation): model = vhmm.VariationalGaussianHMM( 4, n_iter=500, tol=1e-9, init_params="stm", covariance_type=self.covariance_type, implementation=implementation) model.dof_prior_ = [1, 1, 1, 1] model.dof_posterior_ = [1, 1, 1, 1] model.scale_prior_ = [[2], [2], [2], [2]] model.scale_posterior_ = [[2], [2], [2], [2]] return model @pytest.mark.parametrize("implementation", ["scaling", "log"]) def test_initialization(self, implementation): random_state = check_random_state(234234) sequences, lengths = get_sequences( 50, 10, model=get_mcgrory_titterington()) # dof's have wrong shape with pytest.raises(ValueError): model = self.new_for_init(implementation) model.dof_prior_ = [1, 1, 1] assert_log_likelihood_increasing(model, sequences, lengths, 10) with pytest.raises(ValueError): model = self.new_for_init(implementation) model.dof_posterior_ = [1, 1, 1] assert_log_likelihood_increasing(model, sequences, lengths, 10) # scales's have wrong shape with pytest.raises(ValueError): model = self.new_for_init(implementation) model.scale_prior_ = [[2], [2], [2]] assert_log_likelihood_increasing(model, sequences, lengths, 10) with pytest.raises(ValueError): model = vhmm.VariationalGaussianHMM( 4, n_iter=500, tol=1e-9, init_params="stm", covariance_type=self.covariance_type, implementation=implementation) model.dof_prior_ = [1, 1, 1, 1] model.dof_posterior_ = [1, 1, 1, 1] model.scale_prior_ = [[2], [2], [2], [2]] model.scale_posterior_ = [[2, 2, 2]] # this is wrong assert_log_likelihood_increasing(model, sequences, lengths, 10) # Set priors correctly via params model = vhmm.VariationalGaussianHMM( 4, n_iter=500, tol=1e-9, random_state=random_state, covariance_type=self.covariance_type, implementation=implementation, means_prior=[[0.], [0.], [0.], [0.]], beta_prior=[2., 2., 2., 2.], dof_prior=[2., 2., 2., 2.], scale_prior=[[2], [2], [2], [2]] ) assert_log_likelihood_increasing(model, sequences, lengths, 10) assert np.all(model.means_prior_ == 0) assert np.all(model.beta_prior_ == 2.) assert np.all(model.dof_prior_ == 2.) assert np.all(model.scale_prior_ == 2.) # Manually set everything model = vhmm.VariationalGaussianHMM( 4, n_iter=500, tol=1e-9, random_state=random_state, covariance_type=self.covariance_type, implementation=implementation, init_params="", ) model.means_prior_ = [[0.], [0.], [0.], [0.]] model.means_posterior_ = [[2], [1], [3], [4]] model.beta_prior_ = [2., 2., 2., 2.] model.beta_posterior_ = [1, 1, 1, 1] model.dof_prior_ = [2., 2., 2., 2.] model.dof_posterior_ = [1, 1, 1, 1] model.scale_prior_ = [[2], [2], [2], [2]] model.scale_posterior_ =[[2], [2], [2], [2]] assert_log_likelihood_increasing(model, sequences, lengths, 10) hmmlearn-0.3.0/lib/hmmlearn/utils.py000066400000000000000000000034471441755530000174140ustar00rootroot00000000000000import numpy as np from scipy import special def normalize(a, axis=None): """ Normalize the input array so that it sums to 1. Parameters ---------- a : array Non-normalized input data. axis : int Dimension along which normalization is performed. Notes ----- Modifies the input **inplace**. """ a_sum = a.sum(axis) if axis and a.ndim > 1: # Make sure we don't divide by zero. a_sum[a_sum == 0] = 1 shape = list(a.shape) shape[axis] = 1 a_sum.shape = shape a /= a_sum def log_normalize(a, axis=None): """ Normalize the input array so that ``sum(exp(a)) == 1``. Parameters ---------- a : array Non-normalized input data. axis : int Dimension along which normalization is performed. Notes ----- Modifies the input **inplace**. """ if axis is not None and a.shape[axis] == 1: # Handle single-state GMMHMM in the degenerate case normalizing a # single -inf to zero. a[:] = 0 else: with np.errstate(under="ignore"): a_lse = special.logsumexp(a, axis, keepdims=True) a -= a_lse def fill_covars(covars, covariance_type='full', n_components=1, n_features=1): if covariance_type == 'full': return covars elif covariance_type == 'diag': return np.array(list(map(np.diag, covars))) elif covariance_type == 'tied': return np.tile(covars, (n_components, 1, 1)) elif covariance_type == 'spherical': # Regardless of what is passed in, we flatten in # and then expand it to the correct shape covars = np.ravel(covars) eye = np.eye(n_features)[np.newaxis, :, :] covars = covars[:, np.newaxis, np.newaxis] return eye * covars hmmlearn-0.3.0/lib/hmmlearn/vhmm.py000066400000000000000000001024751441755530000172240ustar00rootroot00000000000000import logging import numbers import numpy as np from scipy import special from sklearn import cluster from sklearn.utils import check_random_state from . import _kl_divergence as _kl, _utils from ._emissions import BaseCategoricalHMM, BaseGaussianHMM from .base import VariationalBaseHMM from .hmm import COVARIANCE_TYPES from .utils import fill_covars _log = logging.getLogger(__name__) class VariationalCategoricalHMM(BaseCategoricalHMM, VariationalBaseHMM): """ Hidden Markov Model with categorical (discrete) emissions trained using Variational Inference. References: * https://cse.buffalo.edu/faculty/mbeal/thesis/ Attributes ---------- n_features : int Number of possible symbols emitted by the model (in the samples). monitor_ : ConvergenceMonitor Monitor object used to check the convergence of EM. startprob_prior_ : array, shape (n_components, ) Prior for the initial state occupation distribution. startprob_posterior_ : array, shape (n_components, ) Posterior estimate of the state occupation distribution. transmat_prior_ : array, shape (n_components, n_components) Prior for the matrix of transition probabilities between states. transmat_posterior_ : array, shape (n_components, n_components) Posterior estimate of the transition probabilities between states. emissionprob_prior_ : array, shape (n_components, n_features) Prior estimatate of emitting a given symbol when in each state. emissionprob_posterior_ : array, shape (n_components, n_features) Posterior estimate of emitting a given symbol when in each state. Examples -------- >>> from hmmlearn.hmm import VariationalCategoricalHMM >>> VariationalCategoricalHMM(n_components=2) #doctest: +ELLIPSIS VariationalCategoricalHMM(algorithm='viterbi',... """ def __init__(self, n_components=1, startprob_prior=None, transmat_prior=None, emissionprob_prior=None, n_features=None, algorithm="viterbi", random_state=None, n_iter=100, tol=1e-6, verbose=False, params="ste", init_params="ste", implementation="log"): """ Parameters ---------- n_components : int Number of states. startprob_prior : array, shape (n_components, ), optional Parameters of the Dirichlet prior distribution for :attr:`startprob_`. transmat_prior : array, shape (n_components, n_components), optional Parameters of the Dirichlet prior distribution for each row of the transition probabilities :attr:`transmat_`. emissionprob_prior : array, shape (n_components, n_features), optional Parameters of the Dirichlet prior distribution for :attr:`emissionprob_`. n_features: int, optional The number of categorical symbols in the HMM. Will be inferred from the data if not set. algorithm : {"viterbi", "map"}, optional Decoder algorithm. random_state: RandomState or an int seed, optional A random number generator instance. n_iter : int, optional Maximum number of iterations to perform. tol : float, optional Convergence threshold. EM will stop if the gain in log-likelihood is below this value. verbose : bool, optional Whether per-iteration convergence reports are printed to :data:`sys.stderr`. Convergence can also be diagnosed using the :attr:`monitor_` attribute. params, init_params : string, optional The parameters that get updated during (``params``) or initialized before (``init_params``) the training. Can contain any combination of 's' for startprob, 't' for transmat, and 'e' for emissionprob. Defaults to all parameters. implementation : string, optional Determines if the forward-backward algorithm is implemented with logarithms ("log"), or using scaling ("scaling"). The default is to use logarithms for backwards compatability. """ super().__init__( n_components=n_components, startprob_prior=startprob_prior, transmat_prior=transmat_prior, algorithm=algorithm, random_state=random_state, n_iter=n_iter, tol=tol, verbose=verbose, params=params, init_params=init_params, implementation=implementation ) self.emissionprob_prior = emissionprob_prior self.n_features = n_features def _init(self, X, lengths): """ Initialize model parameters prior to fitting. Parameters ---------- X : array-like, shape (n_samples, n_features) Feature matrix of individual samples. lengths : array-like of integers, shape (n_sequences, ) Lengths of the individual sequences in ``X``. The sum of these should be ``n_samples``. """ super()._init(X, lengths) random_state = check_random_state(self.random_state) if self._needs_init("e", "emissionprob_posterior_"): emissionprob_init = 1 / self.n_features if self.emissionprob_prior is not None: emissionprob_init = self.emissionprob_prior self.emissionprob_prior_ = np.full( (self.n_components, self.n_features), emissionprob_init) self.emissionprob_posterior_ = random_state.dirichlet( alpha=[emissionprob_init] * self.n_features, size=self.n_components ) * sum(lengths) / self.n_components def _estep_begin(self): super()._estep_begin() # Stored / Computed for efficiency otherwise # it would be done in _compute_subnorm_log_likelihood self.emissionprob_log_subnorm_ = ( special.digamma(self.emissionprob_posterior_) - special.digamma( self.emissionprob_posterior_.sum(axis=1)[:, None])) def _check(self): """ Validate model parameters prior to fitting. Raises ------ ValueError If any of the parameters are invalid, e.g. if :attr:`startprob_` don't sum to 1. """ super()._check() self.emissionprob_prior_ = np.atleast_2d(self.emissionprob_prior_) self.emissionprob_posterior_ = \ np.atleast_2d(self.emissionprob_posterior_) if (self.emissionprob_prior_.shape != self.emissionprob_posterior_.shape): raise ValueError( "emissionprob_prior_ and emissionprob_posterior_must" "have shape (n_components, n_features)") if self.n_features is None: self.n_features = self.emissionprob_posterior_.shape[1] if (self.emissionprob_posterior_.shape != (self.n_components, self.n_features)): raise ValueError( f"emissionprob_ must have shape" f"({self.n_components}, {self.n_features})") def _compute_subnorm_log_likelihood(self, X): return self.emissionprob_log_subnorm_[:, X.squeeze(1)].T def _do_mstep(self, stats): """ Perform the M-step of the VB-EM algorithm. Parameters ---------- stats : dict Sufficient statistics updated from all available samples. """ super()._do_mstep(stats) # emissionprob if "e" in self.params: self.emissionprob_posterior_ = ( self.emissionprob_prior_ + stats['obs']) # Provide the normalized probabilities at the posterior median div = self.emissionprob_posterior_.sum(axis=1)[:, None] self.emissionprob_ = self.emissionprob_posterior_ / div def _compute_lower_bound(self, log_prob): """Compute the lower bound of the model.""" # First, get the contribution from the state transitions # and initial probabilities lower_bound = super()._compute_lower_bound(log_prob) # The compute the contributions of the emissionprob emissionprob_lower_bound = 0 for i in range(self.n_components): emissionprob_lower_bound -= _kl.kl_dirichlet( self.emissionprob_posterior_[i], self.emissionprob_prior_[i]) return lower_bound + emissionprob_lower_bound class VariationalGaussianHMM(BaseGaussianHMM, VariationalBaseHMM): """ Hidden Markov Model with Multivariate Gaussian Emissions trained using Variational Inference. References: * https://arxiv.org/abs/1605.08618 * https://core.ac.uk/reader/10883750 * https://theses.gla.ac.uk/6941/7/2005McGroryPhD.pdf Attributes ---------- n_features : int Dimensionality of the Gaussian emissions. monitor_ : ConvergenceMonitor Monitor object used to check the convergence of EM. startprob_prior_ : array, shape (n_components, ) Prior for the initial state occupation distribution. startprob_posterior_ : array, shape (n_components, ) Posterior estimate of the state occupation distribution. transmat_prior_ : array, shape (n_components, n_components) Prior for the matrix of transition probabilities between states. transmat_posterior_ : array, shape (n_components, n_components) Posterior estimate of the transition probabilities between states. means_prior_: array, shape (n_components, n_features) Prior estimates for the mean of each state. means_posterior_: array, shape (n_components, n_features) Posterior estimates for the mean of each state. beta_prior_: array, shape (n_components, ) Prior estimate on the scale of the variance over the means. beta_posterior_: array, shape (n_components, ) Posterior estimate of the scale of the variance over the means. covars_ : array Covariance parameters for each state. The shape depends on :attr:`covariance_type`: * (n_components, ) if "spherical", * (n_components, n_features) if "diag", * (n_components, n_features, n_features) if "full", * (n_features, n_features) if "tied". dof_prior_: int / array The Degrees Of Freedom prior for each state's Wishart distribution. The type depends on :attr:`covariance_type`: * array, shape (n_components, ) if "full", * int if "tied". dof_prior_: int / array The Prior on the Degrees Of Freedom for each state's Wishart distribution. The type depends on :attr:`covariance_type`: * array, shape (n_components, ) if "full", * int if "tied". dof_posterior_: int / array The Degrees Of Freedom for each state's Wishart distribution. The type depends on :attr:`covariance_type`: * array, shape (n_components, ) if "full", * int if "tied". scale_prior_ : array Prior for the Inverse scale parameter for each state's Wishart distribution. The wishart distribution is the conjugate prior for the covariance. The shape depends on :attr:`covariance_type`: * (n_components, ) if "spherical", * (n_components, n_features) if "diag", * (n_components, n_features, n_features) if "full", * (n_features, n_features) if "tied". scale_posterior_ : array Inverse scale parameter for each state's wishart distribution. The wishart distribution is the conjugate prior for the covariance. The shape depends on :attr:`covariance_type`: * (n_components, ) if "spherical", * (n_components, n_features) if "diag", * (n_components, n_features, n_features) if "full", * (n_features, n_features) if "tied". Examples -------- >>> from hmmlearn.hmm import VariationalGaussianHMM >>> VariationalGaussianHMM(n_components=2) #doctest: +ELLIPSIS VariationalGaussianHMM(algorithm='viterbi',... """ def __init__(self, n_components=1, covariance_type="full", startprob_prior=None, transmat_prior=None, means_prior=None, beta_prior=None, dof_prior=None, scale_prior=None, algorithm="viterbi", random_state=None, n_iter=100, tol=1e-6, verbose=False, params="stmc", init_params="stmc", implementation="log"): """ Parameters ---------- n_components : int Number of states. covariance_type : {"spherical", "diag", "full", "tied"}, optional The type of covariance parameters to use: * "spherical" --- each state uses a single variance value that applies to all features (default). * "diag" --- each state uses a diagonal covariance matrix. * "full" --- each state uses a full (i.e. unrestricted) covariance matrix. * "tied" --- all states use **the same** full covariance matrix. startprob_prior : array, shape (n_components, ), optional Parameters of the Dirichlet prior distribution for :attr:`startprob_`. transmat_prior : array, shape (n_components, n_components), optional Parameters of the Dirichlet prior distribution for each row of the transition probabilities :attr:`transmat_`. means_prior, beta_prior : array, shape (n_components, ), optional Mean and precision of the Normal prior distribtion for :attr:`means_`. scale_prior, dof_prior : array, optional Parameters of the prior distribution for the covariance matrix :attr:`covars_`. If :attr:`covariance_type` is "spherical" or "diag" the prior is the inverse gamma distribution, otherwise --- the inverse Wishart distribution. The shape of the scale_prior array depends on :attr:`covariance_type`: * (n_components, ) if "spherical", * (n_components, n_features) if "diag", * (n_components, n_features, n_features) if "full", * (n_features, n_features) if "tied". algorithm : {"viterbi", "map"}, optional Decoder algorithm. random_state: RandomState or an int seed, optional A random number generator instance. n_iter : int, optional Maximum number of iterations to perform. tol : float, optional Convergence threshold. EM will stop if the gain in log-likelihood is below this value. verbose : bool, optional Whether per-iteration convergence reports are printed to :data:`sys.stderr`. Convergence can also be diagnosed using the :attr:`monitor_` attribute. params, init_params : string, optional The parameters that get updated during (``params``) or initialized before (``init_params``) the training. Can contain any combination of 's' for startprob, 't' for transmat, 'm' for means, and 'c' for covars. Defaults to all parameters. implementation : string, optional Determines if the forward-backward algorithm is implemented with logarithms ("log"), or using scaling ("scaling"). The default is to use logarithms for backwards compatability. """ super().__init__( n_components=n_components, startprob_prior=startprob_prior, transmat_prior=transmat_prior, algorithm=algorithm, random_state=random_state, n_iter=n_iter, tol=tol, verbose=verbose, params=params, init_params=init_params, implementation=implementation ) self.covariance_type = covariance_type self.means_prior = means_prior self.beta_prior = beta_prior self.dof_prior = dof_prior self.scale_prior = scale_prior @property def covars_(self): """Return covars as a full matrix.""" return fill_covars(self._covars_, self.covariance_type, self.n_components, self.n_features) @covars_.setter def covars_(self, covars): covars = np.array(covars, copy=True) _utils._validate_covars(covars, self.covariance_type, self.n_components) self._covars_ = covars @property def means_(self): """ Compat for _BaseGaussianHMM. We return the mean of the approximating distribution, which for us is just `means_posterior_` """ return self.means_posterior_ def _init(self, X, lengths): """ Initialize model parameters prior to fitting. Parameters ---------- X : array-like, shape (n_samples, n_features) Feature matrix of individual samples. lengths : array-like of integers, shape (n_sequences, ) Lengths of the individual sequences in ``X``. The sum of these should be ``n_samples``. """ super()._init(X, lengths) X_mean = X.mean(axis=0) # Kmeans will be used for initializing both the means # and the covariances kmeans = cluster.KMeans(n_clusters=self.n_components, random_state=self.random_state, n_init=10) # sklearn >=1.2 compat. kmeans.fit(X) cluster_counts = np.bincount(kmeans.predict(X)) if (self._needs_init("m", "means_prior_") or self._needs_init("m", "means_posterior_") or self._needs_init("m", "beta_prior_") or self._needs_init("m", "beta_posterior_")): if self.means_prior is None: self.means_prior_ = np.full( (self.n_components, self.n_features), X_mean) else: self.means_prior_ = self.means_prior # Initialize to the data means self.means_posterior_ = np.copy(kmeans.cluster_centers_) if self.beta_prior is None: self.beta_prior_ = np.zeros(self.n_components) + 1 else: self.beta_prior_ = self.beta_prior # Count of items in each cluster self.beta_posterior_ = np.copy(cluster_counts) if (self._needs_init("c", "dof_prior_") or self._needs_init("c", "dof_posterior_") or self._needs_init("c", "scale_prior_") or self._needs_init("c", "scale_posterior_")): if self.covariance_type in ("full", "diag", "spherical"): if self.dof_prior is None: self.dof_prior_ = np.full( (self.n_components,), self.n_features) else: self.dof_prior_ = self.dof_prior self.dof_posterior_ = np.copy(cluster_counts) elif self.covariance_type == "tied": if self.dof_prior is None: self.dof_prior_ = self.n_features else: self.dof_prior_ = self.dof_prior self.dof_posterior_ = cluster_counts.sum() # Covariance posterior comes from the estimate of the data # We store and update both W_k and scale_posterior_, # as they each are used in the EM-like algorithm cv = np.cov(X.T) + 1E-3 * np.eye(X.shape[1]) self.covars_ = \ _utils.distribute_covar_matrix_to_match_covariance_type( cv, self.covariance_type, self.n_components).copy() if self.covariance_type == "full": if self.scale_prior is None: self.scale_prior_ = np.broadcast_to( np.identity(self.n_features) * 1e-3, (self.n_components, self.n_features, self.n_features) ) else: self.scale_prior_ = self.scale_prior self.scale_posterior_ = ( self._covars_ * np.asarray(self.dof_posterior_)[:, None, None]) elif self.covariance_type == "tied": if self.scale_prior is None: self.scale_prior_ = np.identity(self.n_features) * 1e-3 else: self.scale_prior_ = self.scale_prior self.scale_posterior_ = self._covars_ * self.dof_posterior_ elif self.covariance_type == "diag": if self.scale_prior is None: self.scale_prior_ = np.full( (self.n_components, self.n_features), 1e-3) else: self.scale_prior_ = self.scale_prior self.scale_posterior_ = np.einsum( "ij,i->ij",self._covars_, self.dof_posterior_) elif self.covariance_type == "spherical": if self.scale_prior is None: self.scale_prior_ = np.full((self.n_components, ), 1e-3) else: self.scale_prior_ = self.scale_prior self.scale_posterior_ = (self._covars_.mean(axis=1) * self.dof_posterior_) def _get_n_fit_scalars_per_param(self): if self.covariance_type not in COVARIANCE_TYPES: raise ValueError( f"{self.covariance_type} is invalid") nc = self.n_components nf = self.n_features return { "s": nc - 1, "t": nc * (nc - 1), "m": nc * nf + nc, "c": { "full": nc + nc * nf * (nf + 1) // 2, "tied": 1 + nf * (nf + 1) // 2, "diag": nc + nc * nf, "spherical": nc + nc, }[self.covariance_type], } def _check(self): """ Validate model parameters prior to fitting. Raises ------ ValueError If any of the parameters are invalid, e.g. if :attr:`startprob_` don't sum to 1. """ if self.covariance_type not in COVARIANCE_TYPES: raise ValueError( f"{self.covariance_type} is invalid") means_shape = (self.n_components, self.n_features) self.means_prior_ = np.asarray(self.means_prior_, dtype=float) self.means_posterior_ = np.asarray(self.means_posterior_, dtype=float) if self.means_prior_.shape != means_shape: raise ValueError( "means_prior_ have shape (n_components, n_features)") if self.means_posterior_.shape != means_shape: raise ValueError( "means_posterior_ must have shape (n_components, n_features)") self.beta_prior_ = np.asarray(self.beta_prior_, dtype=float) self.beta_posterior_ = np.asarray(self.beta_posterior_, dtype=float) if self.beta_prior_.shape != (self.n_components,): raise ValueError( "beta_prior_ have shape (n_components,)") if self.beta_posterior_.shape != (self.n_components,): raise ValueError( "beta_posterior_ must have shape (n_components,)") if self.covariance_type in ("full", "diag", "spherical"): self.dof_prior_ = np.asarray(self.dof_prior_, dtype=float) self.dof_posterior_ = np.asarray(self.dof_posterior_, dtype=float) if self.dof_prior_.shape != (self.n_components,): raise ValueError( "dof_prior_ have shape (n_components,)") if self.dof_posterior_.shape != (self.n_components,): raise ValueError( "dof_posterior_ must have shape (n_components,)") elif self.covariance_type == "tied": if not isinstance(self.dof_prior_, numbers.Number): raise ValueError("dof_prior_ should be numeric") if not isinstance(self.dof_posterior_, numbers.Number): raise ValueError("dof_posterior_ should be numeric") self.scale_prior_ = np.asarray(self.scale_prior_, dtype=float) self.scale_posterior_ = np.asarray(self.scale_posterior_, dtype=float) expected = None if self.covariance_type == "full": expected = (self.n_components, self.n_features, self.n_features) elif self.covariance_type == "tied": expected = (self.n_features, self.n_features) elif self.covariance_type == "diag": expected = (self.n_components, self.n_features) elif self.covariance_type == "spherical": expected = (self.n_components, ) # Now check the W's if self.scale_prior_.shape != expected: raise ValueError(f"scale_prior_ must have shape {expected}, " f"found {self.scale_prior_.shape}") if self.scale_posterior_.shape != expected: raise ValueError(f"scale_posterior_ must have shape {expected}, " f"found {self.scale_posterior_.shape}") def _compute_subnorm_log_likelihood(self, X): # Refer to the Gruhl/Sick paper for the notation # In general, things are neater if we pretend the covariance is # full / tied. Or, we could treat each case separately, and reduce # the number of operations. That's left for the future :-) nf = self.n_features term1 = special.digamma( .5 * (self.dof_posterior_ - np.arange(0, nf)[:, None]) ).sum(axis=0) scale_posterior_ = self.scale_posterior_ if self.covariance_type in ("diag", "spherical"): scale_posterior_ = fill_covars(self.scale_posterior_, self.covariance_type, self.n_components, self.n_features) W_k = np.linalg.inv(scale_posterior_) term1 += nf * np.log(2) + _utils.logdet(W_k) term1 /= 2. # We ignore the constant that is typically excluded in the literature # term2 = self.n_features * log(2 * M_PI) / 2 term2 = 0 term3 = nf / self.beta_posterior_ # (X - Means) * W_k * (X-Means)^T * self.dof_posterior_ delta = (X - self.means_posterior_[:, None]) # c is the HMM Component # i is the length of the sequence X # j, k are the n_features # output shape is length * number of components if self.covariance_type in ("full", "diag", "spherical"): dots = np.einsum("cij,cjk,cik,c->ic", delta, W_k, delta, self.dof_posterior_) elif self.covariance_type == "tied": dots = np.einsum("cij,jk,cik,->ic", delta, W_k, delta, self.dof_posterior_) last_term = .5 * (dots + term3) lll = term1 - term2 - last_term return lll def _do_mstep(self, stats): """ Perform the M-step of VB-EM algorithm. Parameters ---------- stats : dict Sufficient statistics updated from all available samples. """ super()._do_mstep(stats) if "m" in self.params: self.beta_posterior_ = self.beta_prior_ + stats['post'] self.means_posterior_ = np.einsum("i,ij->ij", self.beta_prior_, self.means_prior_) self.means_posterior_ += stats['obs'] self.means_posterior_ /= self.beta_posterior_[:, None] if "c" in self.params: if self.covariance_type == "full": # Update DOF self.dof_posterior_ = self.dof_prior_ + stats['post'] # Update scale self.scale_posterior_ = ( self.scale_prior_ + stats['obs*obs.T'] + np.einsum("c,ci,cj->cij", self.beta_prior_, self.means_prior_, self.means_prior_) - np.einsum("c,ci,cj->cij", self.beta_posterior_, self.means_posterior_, self.means_posterior_)) self._covars_ = (self.scale_posterior_ / self.dof_posterior_[:, None, None]) elif self.covariance_type == "tied": # Update DOF self.dof_posterior_ = self.dof_prior_ + stats['post'].sum() # Update scale self.scale_posterior_ = ( self.scale_prior_ + stats['obs*obs.T'].sum(axis=0) + np.einsum("c,ci,cj->ij", self.beta_prior_, self.means_prior_, self.means_prior_) - np.einsum("c,ci,cj->ij", self.beta_posterior_, self.means_posterior_, self.means_posterior_)) self._covars_ = self.scale_posterior_ / self.dof_posterior_ elif self.covariance_type == "diag": # Update DOF self.dof_posterior_ = self.dof_prior_ + stats['post'] # Update scale self.scale_posterior_ = ( self.scale_prior_ + stats['obs**2'] + np.einsum("c,ci,ci->ci", self.beta_prior_, self.means_prior_, self.means_prior_) - np.einsum("c,ci,ci->ci", self.beta_posterior_, self.means_posterior_, self.means_posterior_)) self._covars_ = (self.scale_posterior_ / self.dof_posterior_[:, None]) elif self.covariance_type == "spherical": # Update DOF self.dof_posterior_ = self.dof_prior_ + stats['post'] # Update scale term2 = (stats['obs**2'] + np.einsum("c,ci,ci->ci", self.beta_prior_, self.means_prior_, self.means_prior_) - np.einsum("c,ci,ci->ci", self.beta_posterior_, self.means_posterior_, self.means_posterior_)) self.scale_posterior_ = ( self.scale_prior_ + term2.mean(axis=1)) self.scale_posterior_ = self.scale_posterior_ self._covars_ = (self.scale_posterior_ / self.dof_posterior_) def _compute_lower_bound(self, log_prob): # First, get the contribution from the state transitions # and initial probabilities lower_bound = super()._compute_lower_bound(log_prob) # The compute the contributions of the emissions emissions_lower_bound = 0 # For ease of implementation, pretend everything is shaped like # full covariance. scale_posterior_ = self.scale_posterior_ scale_prior_ = self.scale_prior_ if self.covariance_type != "full": scale_posterior_ = fill_covars(self.scale_posterior_, self.covariance_type, self.n_components, self.n_features) scale_prior_ = fill_covars(self.scale_prior_, self.covariance_type, self.n_components, self.n_features) W_k = np.linalg.inv(scale_posterior_) if self.covariance_type != "tied": dof = self.dof_posterior_ else: dof = np.repeat(self.dof_posterior_, self.n_components) for i in range(self.n_components): precision = W_k[i] * dof[i] # KL for the normal distributions term1 = np.linalg.inv(self.beta_posterior_[i] * precision) term2 = np.linalg.inv(self.beta_prior_[i] * precision) kln = _kl.kl_multivariate_normal_distribution( self.means_posterior_[i], term1, self.means_prior_[i], term2, ) emissions_lower_bound -= kln # KL for the wishart distributions klw = 0. if self.covariance_type in ("full", "diag", "spherical"): klw = _kl.kl_wishart_distribution( self.dof_posterior_[i], scale_posterior_[i], self.dof_prior_[i], scale_prior_[i]) elif self.covariance_type == "tied": # Just compute it for the first component if i == 0: klw = _kl.kl_wishart_distribution( self.dof_posterior_, self.scale_posterior_, self.dof_prior_, self.scale_prior_) else: klw = 0 emissions_lower_bound -= klw return lower_bound + emissions_lower_bound def _needs_sufficient_statistics_for_mean(self): return 'm' in self.params or 'c' in self.params def _needs_sufficient_statistics_for_covars(self): return 'c' in self.params hmmlearn-0.3.0/scripts/000077500000000000000000000000001441755530000150105ustar00rootroot00000000000000hmmlearn-0.3.0/scripts/benchmark.py000066400000000000000000000210131441755530000173110ustar00rootroot00000000000000""" A script for testing / benchmarking HMM Implementations """ import argparse import collections import logging import time import hmmlearn.hmm import numpy as np import sklearn.base LOG = logging.getLogger(__file__) class Benchmark: def __init__(self, repeat, n_iter, verbose): self.repeat = repeat self.n_iter = n_iter self.verbose = verbose def benchmark(self, sequences, lengths, model, tag): elapsed = [] for i in range(self.repeat): start = time.time() cloned = sklearn.base.clone(model) cloned.fit(sequences, lengths) end = time.time() elapsed.append(end-start) self.log_one_run(start, end, cloned, tag) return np.asarray(elapsed) def generate_training_sequences(self): pass def new_model(self, implementation): pass def run(self, results_file): runtimes = collections.defaultdict(dict) sequences, lengths = self.generate_training_sequences() for implementation in ["scaling", "log"]: model = self.new_model(implementation) LOG.info(f"{model.__class__.__name__}: testing {implementation}") key = f"{model.__class__.__name__}|EM|hmmlearn-{implementation}" elapsed = self.benchmark(sequences, lengths, model, key) runtimes[key]["mean"] = elapsed.mean() runtimes[key]["std"] = elapsed.std() with open(results_file, mode="w") as fd: fd.write("configuration,mean,std,n_iterations,repeat\n") for key, value in runtimes.items(): fd.write(f"{key},{value['mean']},{value['std']}," f"{self.n_iter},{self.repeat}\n") def log_one_run(self, start, end, model, tag): LOG.info(f"Training Took {end-start} seconds {tag}") LOG.info(f"startprob={model.startprob_}") LOG.info(f"transmat={model.transmat_}") class GaussianBenchmark(Benchmark): def new_model(self, implementation): return hmmlearn.hmm.GaussianHMM( n_components=4, n_iter=self.n_iter, covariance_type="full", implementation=implementation, verbose=self.verbose ) def generate_training_sequences(self): sampler = hmmlearn.hmm.GaussianHMM( n_components=4, covariance_type="full", init_params="", verbose=self.verbose ) sampler.startprob_ = np.asarray([0, 0, 0, 1]) sampler.transmat_ = np.asarray([ [.2, .2, .3, .3], [.3, .2, .2, .3], [.2, .3, .3, .2], [.3, .3, .2, .2], ]) sampler.means_ = np.asarray([ -1.5, 0, 1.5, 3 ]).reshape(4, 1) sampler.covars_ = np.asarray([ .5, .5, .5, .5 ]).reshape(4, 1, 1,) sequences, states = sampler.sample(50000) lengths = [len(sequences)] return sequences, lengths def log_one_run(self, start, end, model, tag): super().log_one_run(start, end, model, tag) LOG.info(f"means={model.means_}") LOG.info(f"covars={model.covars_}") class MultinomialBenchmark(Benchmark): def new_model(self, implementation): return hmmlearn.hmm.MultinomialHMM( n_components=3, n_iter=self.n_iter, verbose=self.verbose, implementation=implementation ) def generate_training_sequences(self): sampler = hmmlearn.hmm.MultinomialHMM(n_components=3) sampler.startprob_ = np.array([0.6, 0.3, 0.1]) sampler.transmat_ = np.array([[0.6, 0.2, 0.2], [0.3, 0.5, 0.2], [0.4, 0.3, 0.3]]) sampler.emissionprob_ = np.array([ [.1, .5, .1, .3], [.1, .2, .4, .3], [0, .5, .5, .0], ]) sequences, states = sampler.sample(50000) lengths = [len(sequences)] return sequences, lengths def log_one_run(self, start, end, model, tag): super().log_one_run(start, end, model, tag) LOG.info(f"emissions={model.emissionprob_}") class MultivariateGaussianBenchmark(GaussianBenchmark): def generate_training_sequences(self): sampler = hmmlearn.hmm.GaussianHMM( n_components=4, covariance_type="full", init_params="" ) sampler.startprob_ = np.asarray([0, 0, 0, 1]) sampler.transmat_ = np.asarray([ [.2, .2, .3, .3], [.3, .2, .2, .3], [.2, .3, .3, .2], [.3, .3, .2, .2], ]) sampler.means_ = np.asarray([ [-1.5, 0], [0, 0], [1.5, 0], [3, 0] ]) sampler.covars_ = np.asarray([ [[.5, 0], [0, .5]], [[.5, 0], [0, 0.5]], [[.5, 0], [0, .5]], [[0.5, 0], [0, 0.5]], ]) observed, hidden = sampler.sample(50000) lengths = [len(observed)] return observed, lengths class GMMBenchmark(GaussianBenchmark): def generate_training_sequences(self): sampler = hmmlearn.hmm.GMMHMM( n_components=4, n_mix=3, covariance_type="full", init_params="" ) sampler.startprob_ = [.25, .25, .25, .25] sampler.transmat_ = [ [.1, .3, .3, .3], [.3, .1, .3, .3], [.3, .3, .1, .3], [.3, .3, .3, .1], ] sampler.weights_ = [ [.2, .2, .6], [.6, .2, .2], [.2, .6, .2], [.1, .1, .8], ] sampler.means_ = np.asarray([ [[-10], [-12], [-9]], [[-5], [-4], [-3]], [[-1.5], [0], [1.5]], [[5], [7], [9]], ]) sampler.covars_ = np.asarray([ [[[.125]], [[.125]], [[.125]]], [[[.125]], [[.125]], [[.125]]], [[[.125]], [[.125]], [[.125]]], [[[.125]], [[.125]], [[.125]]], ]) n_sequences = 10 length = 5_000 sequences = [] for i in range(n_sequences): sequences.append(sampler.sample(5000)[0]) return np.concatenate(sequences), [length] * n_sequences def new_model(self, implementation): return hmmlearn.hmm.GMMHMM( n_components=4, n_mix=3, n_iter=self.n_iter, covariance_type="full", verbose=self.verbose, implementation=implementation ) def log_one_run(self, start, end, model, tag): super().log_one_run(start, end, model, tag) LOG.info(f"weights_={model.weights_}") def main(): parser = argparse.ArgumentParser() parser.add_argument("--all", action="store_true") parser.add_argument("--categorical", action="store_true") parser.add_argument("--gaussian", action="store_true") parser.add_argument("--multivariate-gaussian", action="store_true") parser.add_argument("--gaussian-mixture", action="store_true") parser.add_argument("--repeat", type=int, default=10) parser.add_argument("--verbose", action="store_true") parser.add_argument("--n-iter", type=int, default=100) args = parser.parse_args() if args.all: args.categorical = True args.gaussian = True args.multivariate_gaussian = True args.gaussian_mixture = True if args.categorical: bench = MultinomialBenchmark( repeat=args.repeat, n_iter=args.n_iter, verbose=args.verbose, ) bench.run("categorical.benchmark.csv") if args.gaussian: bench = GaussianBenchmark( repeat=args.repeat, n_iter=args.n_iter, verbose=args.verbose, ) bench.run("gaussian.benchmark.csv") if args.multivariate_gaussian: bench = MultivariateGaussianBenchmark( repeat=args.repeat, n_iter=args.n_iter, verbose=args.verbose, ) bench.run("multivariate_gaussian.benchmark.csv") if args.gaussian_mixture: bench = GMMBenchmark( repeat=args.repeat, n_iter=args.n_iter, verbose=args.verbose, ) bench.run("gmm.benchmark.csv") if __name__ == "__main__": logging.basicConfig( format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.DEBUG ) main() hmmlearn-0.3.0/setup.cfg000066400000000000000000000002161441755530000151410ustar00rootroot00000000000000[tool:pytest] addopts = --doctest-glob=lib/**/*.py --doctest-glob=*.rst [coverage:run] branch = true source = hmmlearn omit = **/_version.py hmmlearn-0.3.0/setup.py000066400000000000000000000051601441755530000150350ustar00rootroot00000000000000# Copyright (C) 2007-2009 Cournapeau David # 2010 Fabian Pedregosa # 2014 Gael Varoquaux # 2014-2016 Sergei Lebedev # 2018- Antony Lee import os os.environ.setdefault("SETUPTOOLS_USE_DISTUTILS", "stdlib") import setuptools from setuptools import Extension, find_packages, setup from setuptools.command.build_ext import build_ext class build_ext(build_ext): def finalize_options(self): from pybind11.setup_helpers import Pybind11Extension self.distribution.ext_modules[:] = [Pybind11Extension( "hmmlearn._hmmc", ["src/_hmmc.cpp"], cxx_std=11)] super().finalize_options() def build_extensions(self): try: self.compiler.compiler_so.remove("-Wstrict-prototypes") except (AttributeError, ValueError): pass super().build_extensions() setup( name="hmmlearn", description="Hidden Markov Models in Python with scikit-learn like API", long_description=open("README.rst", encoding="utf-8").read(), long_description_content_type="text/x-rst", maintainer="Antony Lee", url="https://github.com/hmmlearn/hmmlearn", license="new BSD", classifiers=[ "Development Status :: 3 - Alpha", "License :: OSI Approved", "Intended Audience :: Developers", "Intended Audience :: Science/Research", "Topic :: Software Development", "Topic :: Scientific/Engineering", "Programming Language :: Python", "Programming Language :: Python :: 3", ], cmdclass={"build_ext": build_ext}, py_modules=[], packages=find_packages("lib"), package_dir={"": "lib"}, ext_modules=[Extension("", [])], package_data={}, python_requires=">=3.6", setup_requires=[ "pybind11>=2.6", "setuptools_scm>=3.3", # fallback_version. ], use_scm_version=lambda: { # xref __init__.py "version_scheme": "post-release", "local_scheme": "node-and-date", "write_to": "lib/hmmlearn/_version.py", "fallback_version": "0+unknown", }, install_requires=[ "numpy>=1.10", # np.broadcast_to. "scikit-learn>=0.16,!=0.22.0", # check_array, check_is_fitted. "scipy>=0.19", # scipy.special.logsumexp. ], extras_require={ "tests": ["pytest"], "docs": [ "matplotlib", "pydata_sphinx_theme", "sphinx>=2.0", "sphinx-gallery", ], }, entry_points={ "console_scripts": [], "gui_scripts": [], }, ) hmmlearn-0.3.0/src/000077500000000000000000000000001441755530000141105ustar00rootroot00000000000000hmmlearn-0.3.0/src/_hmmc.cpp000066400000000000000000000257121441755530000157060ustar00rootroot00000000000000#include #include #include #include namespace py = pybind11; using ssize_t = Py_ssize_t; double logaddexp(double a, double b) { return a == -std::numeric_limits::infinity() ? b : b == -std::numeric_limits::infinity() ? a : std::max(a, b) + std::log1p(std::exp(-std::abs(b - a))); } double logsumexp(double const* v, ssize_t n) { auto max = *std::max_element(v, v + n); if (std::isinf(max)) { return max; } auto acc = 0.; for (auto i = 0; i < n; ++i) { acc += std::exp(v[i] - max); } return std::log(acc) + max; } py::array_t log( py::array_t x_) { auto n = x_.size(); auto ptr = x_.data(); auto log = py::array_t{{n}}; auto log_ptr = log.mutable_data(); for (auto i = 0; i < n; ++i) { *(log_ptr++) = std::log(*(ptr++)); } if (std::fetestexcept(FE_DIVBYZERO)) { std::feclearexcept(FE_DIVBYZERO); // log(0) = -inf, ignore exception. } return log.reshape(std::vector(x_.shape(), x_.shape() + x_.ndim())); } std::tuple, py::array_t> forward_scaling( py::array_t startprob_, py::array_t transmat_, py::array_t frameprob_) { auto min_sum = 1e-300; auto startprob = startprob_.unchecked<1>(); auto transmat = transmat_.unchecked<2>(); auto frameprob = frameprob_.unchecked<2>(); auto ns = frameprob.shape(0), nc = frameprob.shape(1); if (startprob.shape(0) != nc || transmat.shape(0) != nc || transmat.shape(1) != nc) { throw std::invalid_argument{"shape mismatch"}; } auto fwdlattice_ = py::array_t{{ns, nc}}; auto fwd = fwdlattice_.mutable_unchecked<2>(); auto scaling_ = py::array_t{{ns}}; auto scaling = scaling_.mutable_unchecked<1>(); auto log_prob = 0.; py::gil_scoped_release nogil; std::fill_n(fwd.mutable_data(0, 0), fwd.size(), 0); for (auto i = 0; i < nc; ++i) { fwd(0, i) = startprob(i) * frameprob(0, i); } auto sum = std::accumulate(&fwd(0, 0), &fwd(0, nc), 0.); if (sum < min_sum) { throw std::range_error{"forward pass failed with underflow; " "consider using implementation='log' instead"}; } auto scale = scaling(0) = 1. / sum; log_prob -= std::log(scale); for (auto i = 0; i < nc; ++i) { fwd(0, i) *= scale; } for (auto t = 1; t < ns; ++t) { for (auto j = 0; j < nc; ++j) { for (auto i = 0; i < nc; ++i) { fwd(t, j) += fwd(t - 1, i) * transmat(i, j); } fwd(t, j) *= frameprob(t, j); } auto sum = std::accumulate(&fwd(t, 0), &fwd(t, nc), 0.); if (sum < min_sum) { throw std::range_error{"forward pass failed with underflow; " "consider using implementation='log' instead"}; } auto scale = scaling(t) = 1. / sum; log_prob -= std::log(scale); for (auto j = 0; j < nc; ++j) { fwd(t, j) *= scale; } } return {log_prob, fwdlattice_, scaling_}; } std::tuple> forward_log( py::array_t startprob_, py::array_t transmat_, py::array_t log_frameprob_) { auto log_startprob_ = log(startprob_); auto log_startprob = log_startprob_.unchecked<1>(); auto log_transmat_ = log(transmat_); auto log_transmat = log_transmat_.unchecked<2>(); auto log_frameprob = log_frameprob_.unchecked<2>(); auto ns = log_frameprob.shape(0), nc = log_frameprob.shape(1); if (log_startprob.shape(0) != nc || log_transmat.shape(0) != nc || log_transmat.shape(1) != nc) { throw std::invalid_argument{"shape mismatch"}; } auto buf = std::vector(nc); auto fwdlattice_ = py::array_t{{ns, nc}}; auto fwd = fwdlattice_.mutable_unchecked<2>(); py::gil_scoped_release nogil; for (auto i = 0; i < nc; ++i) { fwd(0, i) = log_startprob(i) + log_frameprob(0, i); } for (auto t = 1; t < ns; ++t) { for (auto j = 0; j < nc; ++j) { for (auto i = 0; i < nc; ++i) { buf[i] = fwd(t - 1, i) + log_transmat(i, j); } fwd(t, j) = logsumexp(buf.data(), nc) + log_frameprob(t, j); } } auto log_prob = logsumexp(&fwd(ns - 1, 0), nc); return {log_prob, fwdlattice_}; } py::array_t backward_scaling( py::array_t startprob_, py::array_t transmat_, py::array_t frameprob_, py::array_t scaling_) { auto startprob = startprob_.unchecked<1>(); auto transmat = transmat_.unchecked<2>(); auto frameprob = frameprob_.unchecked<2>(); auto scaling = scaling_.unchecked<1>(); auto ns = frameprob.shape(0), nc = frameprob.shape(1); if (startprob.shape(0) != nc || transmat.shape(0) != nc || transmat.shape(1) != nc || scaling.shape(0) != ns) { throw std::invalid_argument{"shape mismatch"}; } auto bwdlattice_ = py::array_t{{ns, nc}}; auto bwd = bwdlattice_.mutable_unchecked<2>(); py::gil_scoped_release nogil; std::fill_n(bwd.mutable_data(0, 0), bwd.size(), 0); for (auto i = 0; i < nc; ++i) { bwd(ns - 1, i) = scaling(ns - 1); } for (auto t = ns - 2; t >= 0; --t) { for (auto i = 0; i < nc; ++i) { for (auto j = 0; j < nc; ++j) { bwd(t, i) += transmat(i, j) * frameprob(t + 1, j) * bwd(t + 1, j); } bwd(t, i) *= scaling(t); } } return bwdlattice_; } py::array_t backward_log( py::array_t startprob_, py::array_t transmat_, py::array_t log_frameprob_) { auto log_startprob_ = log(startprob_); auto log_startprob = log_startprob_.unchecked<1>(); auto log_transmat_ = log(transmat_); auto log_transmat = log_transmat_.unchecked<2>(); auto log_frameprob = log_frameprob_.unchecked<2>(); auto ns = log_frameprob.shape(0), nc = log_frameprob.shape(1); if (log_startprob.shape(0) != nc || log_transmat.shape(0) != nc || log_transmat.shape(1) != nc) { throw std::invalid_argument{"shape mismatch"}; } auto buf = std::vector(nc); auto bwdlattice_ = py::array_t{{ns, nc}}; auto bwd = bwdlattice_.mutable_unchecked<2>(); py::gil_scoped_release nogil; for (auto i = 0; i < nc; ++i) { bwd(ns - 1, i) = 0; } for (auto t = ns - 2; t >= 0; --t) { for (auto i = 0; i < nc; ++i) { for (auto j = 0; j < nc; ++j) { buf[j] = log_transmat(i, j) + log_frameprob(t + 1, j) + bwd(t + 1, j); } bwd(t, i) = logsumexp(buf.data(), nc); } } return bwdlattice_; } py::array_t compute_scaling_xi_sum( py::array_t fwdlattice_, py::array_t transmat_, py::array_t bwdlattice_, py::array_t frameprob_) { auto fwd = fwdlattice_.unchecked<2>(); auto transmat = transmat_.unchecked<2>(); auto bwd = bwdlattice_.unchecked<2>(); auto frameprob = frameprob_.unchecked<2>(); auto ns = frameprob.shape(0), nc = frameprob.shape(1); if (fwd.shape(0) != ns || fwd.shape(1) != nc || transmat.shape(0) != nc || transmat.shape(1) != nc || bwd.shape(0) != ns || bwd.shape(1) != nc) { throw std::invalid_argument{"shape mismatch"}; } auto xi_sum_ = py::array_t{{nc, nc}}; auto xi_sum = xi_sum_.mutable_unchecked<2>(); std::fill_n(xi_sum.mutable_data(0, 0), xi_sum.size(), 0); py::gil_scoped_release nogil; for (auto t = 0; t < ns - 1; ++t) { for (auto i = 0; i < nc; ++i) { for (auto j = 0; j < nc; ++j) { xi_sum(i, j) += fwd(t, i) * transmat(i, j) * frameprob(t + 1, j) * bwd(t + 1, j); } } } return xi_sum_; } py::array_t compute_log_xi_sum( py::array_t fwdlattice_, py::array_t transmat_, py::array_t bwdlattice_, py::array_t log_frameprob_) { auto fwd = fwdlattice_.unchecked<2>(); auto log_transmat_ = log(transmat_); auto log_transmat = log_transmat_.unchecked<2>(); auto bwd = bwdlattice_.unchecked<2>(); auto log_frameprob = log_frameprob_.unchecked<2>(); auto ns = log_frameprob.shape(0), nc = log_frameprob.shape(1); if (fwd.shape(0) != ns || fwd.shape(1) != nc || log_transmat.shape(0) != nc || log_transmat.shape(1) != nc || bwd.shape(0) != ns || bwd.shape(1) != nc) { throw std::invalid_argument{"shape mismatch"}; } auto log_prob = logsumexp(&fwd(ns - 1, 0), nc); auto log_xi_sum_ = py::array_t{{nc, nc}}; auto log_xi_sum = log_xi_sum_.mutable_unchecked<2>(); std::fill_n(log_xi_sum.mutable_data(0, 0), log_xi_sum.size(), -std::numeric_limits::infinity()); py::gil_scoped_release nogil; for (auto t = 0; t < ns - 1; ++t) { for (auto i = 0; i < nc; ++i) { for (auto j = 0; j < nc; ++j) { auto log_xi = fwd(t, i) + log_transmat(i, j) + log_frameprob(t + 1, j) + bwd(t + 1, j) - log_prob; log_xi_sum(i, j) = logaddexp(log_xi_sum(i, j), log_xi); } } } return log_xi_sum_; } std::tuple> viterbi( py::array_t startprob_, py::array_t transmat_, py::array_t log_frameprob_) { auto log_startprob_ = log(startprob_); auto log_startprob = log_startprob_.unchecked<1>(); auto log_transmat_ = log(transmat_); auto log_transmat = log_transmat_.unchecked<2>(); auto log_frameprob = log_frameprob_.unchecked<2>(); auto ns = log_frameprob.shape(0), nc = log_frameprob.shape(1); if (log_startprob.shape(0) != nc || log_transmat.shape(0) != nc || log_transmat.shape(1) != nc) { throw std::invalid_argument{"shape mismatch"}; } auto state_sequence_ = py::array_t{{ns}}; auto viterbi_lattice_ = py::array_t{{ns, nc}}; auto state_sequence = state_sequence_.mutable_unchecked<1>(); auto viterbi_lattice = viterbi_lattice_.mutable_unchecked<2>(); py::gil_scoped_release nogil; for (auto i = 0; i < nc; ++i) { viterbi_lattice(0, i) = log_startprob(i) + log_frameprob(0, i); } for (auto t = 1; t < ns; ++t) { for (auto i = 0; i < nc; ++i) { auto max = -std::numeric_limits::infinity(); for (auto j = 0; j < nc; ++j) { max = std::max(max, viterbi_lattice(t - 1, j) + log_transmat(j, i)); } viterbi_lattice(t, i) = max + log_frameprob(t, i); } } auto row = &viterbi_lattice(ns - 1, 0); auto prev = state_sequence(ns - 1) = std::max_element(row, row + nc) - row; auto log_prob = row[prev]; for (auto t = ns - 2; t >= 0; --t) { auto max = std::make_pair(-std::numeric_limits::infinity(), 0); for (auto i = 0; i < nc; ++i) { max = std::max(max, {viterbi_lattice(t, i) + log_transmat(i, prev), i}); } state_sequence(t) = prev = max.second; } return {log_prob, state_sequence_}; } PYBIND11_MODULE(_hmmc, m) { m .def("forward_scaling", forward_scaling) .def("forward_log", forward_log) .def("backward_scaling", backward_scaling) .def("backward_log", backward_log) .def("compute_scaling_xi_sum", compute_scaling_xi_sum) .def("compute_log_xi_sum", compute_log_xi_sum) .def("viterbi", viterbi) ; }