pax_global_header00006660000000000000000000000064132335154050014513gustar00rootroot0000000000000052 comment=e9fb65785fbffc9bca06f7146d46b7bd581750fe pyemd-0.5.1/000077500000000000000000000000001323351540500126345ustar00rootroot00000000000000pyemd-0.5.1/.gitignore000066400000000000000000000001511323351540500146210ustar00rootroot00000000000000__pycache__ .gitconfig .cache .tox .env .ropeproject *.so *.pyc MANIFEST *.egg* build dist pyemd/emd.cpp pyemd-0.5.1/.travis.yml000066400000000000000000000007761323351540500147570ustar00rootroot00000000000000sudo: false language: python python: - '2.7' - '3.4' - '3.5' - '3.6' install: - pip install -r dev_requirements.txt - make build - pip uninstall --yes -r dev_requirements.txt - pip install tox-travis script: tox notifications: email: false slack: rooms: secure: rxQsNRK9XBkBV0pdYuJG+tsN2tky+JUEF5ayDIUAzSaPeB//VVNNofJhcmfNgG1WiEEi6fe0dR/Y6UDsoVyQrbCHO2q2bIVQp6A/63vgz3DcVQzMahB/QVwte7gy02nLf6rS2g3VetVXrTW6OO4Cv7NQrQb58biVFx/yBtQ3qzI= on_success: never on_failure: always pyemd-0.5.1/CONTRIBUTING.md000066400000000000000000000007601323351540500150700ustar00rootroot00000000000000Installation issues =================== Before opening an issue related to installation, please try to install PyEMD in a fresh, empty Python virtual environment and check that the problem persists: ```shell pip install virtualenvwrapper mkvirtualenv pyemd pip install pyemd ``` PyEMD is not officially supported for (but may nonetheless work with) the following: - Anaconda distributions - Windows operating systems However, if you need to use it in these cases, pull requests are welcome! pyemd-0.5.1/LICENSE000066400000000000000000000020441323351540500136410ustar00rootroot00000000000000Copyright (c) 2014-2017 Will Mayner Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. pyemd-0.5.1/MANIFEST.in000066400000000000000000000002211323351540500143650ustar00rootroot00000000000000graft pyemd graft test include README.rst include LICENSE include conftest.py global-exclude __pycache__ *.py[cod] global-exclude *.so *.dylib pyemd-0.5.1/Makefile000066400000000000000000000010651323351540500142760ustar00rootroot00000000000000.PHONY: default test build clean dist test-dist check-dist build-dist clean-dist src = pyemd dist_dir = dist default: build test: build py.test build: clean python setup.py build_ext -b . clean: rm -f pyemd/*.so dist: build-dist check-dist twine upload $(dist_dir)/* test-dist: build-dist check-dist twine upload --repository-url https://test.pypi.org/legacy/ $(dist_dir)/* check-dist: python setup.py check --restructuredtext --strict build-dist: clean-dist python setup.py sdist bdist_wheel --dist-dir=$(dist_dir) clean-dist: rm -rf $(dist_dir) pyemd-0.5.1/README.rst000066400000000000000000000170571323351540500143350ustar00rootroot00000000000000.. image:: https://img.shields.io/travis/wmayner/pyemd/develop.svg?style=flat-square&maxAge=3600 :target: https://travis-ci.org/wmayner/pyemd .. image:: https://img.shields.io/pypi/pyversions/pyemd.svg?style=flat-square&maxAge=86400 :target: https://wiki.python.org/moin/Python2orPython3 :alt: Python versions badge PyEMD: Fast EMD for Python ========================== PyEMD is a Python wrapper for `Ofir Pele and Michael Werman's implementation `_ of the `Earth Mover's Distance `_ that allows it to be used with NumPy. **If you use this code, please cite the papers listed at the end of this document.** Installation ------------ .. code:: bash pip install pyemd Usage ----- .. code:: python >>> from pyemd import emd >>> import numpy as np >>> first_histogram = np.array([0.0, 1.0]) >>> second_histogram = np.array([5.0, 3.0]) >>> distance_matrix = np.array([[0.0, 0.5], ... [0.5, 0.0]]) >>> emd(first_histogram, second_histogram, distance_matrix) 3.5 You can also get the associated minimum-cost flow: .. code:: python >>> from pyemd import emd_with_flow >>> emd_with_flow(first_histogram, second_histogram, distance_matrix) (3.5, [[0.0, 0.0], [0.0, 1.0]]) You can also calculate the EMD directly from two arrays of observations: .. code:: python >>> from pyemd import emd_samples >>> first_array = [1, 2, 3, 4] >>> second_array = [2, 3, 4, 5] >>> emd_samples(first_array, second_array, bins=2) 0.5 Documentation ------------- emd() ~~~~~ .. code:: python emd(first_histogram, second_histogram, distance_matrix, extra_mass_penalty=-1.0) *Arguments:* - ``first_histogram`` *(np.ndarray)*: A 1D array of type ``np.float64`` of length *N*. - ``second_histogram`` *(np.ndarray)*: A 1D array of ``np.float64`` of length *N*. - ``distance_matrix`` *(np.ndarray)*: A 2D array of ``np.float64,`` of size at least *N* × *N*. This defines the underlying metric, or ground distance, by giving the pairwise distances between the histogram bins. It must represent a metric; there is no warning if it doesn't. *Keyword Arguments:* - ``extra_mass_penalty`` *(float)*: The penalty for extra mass. If you want the resulting distance to be a metric, it should be at least half the diameter of the space (maximum possible distance between any two points). If you want partial matching you can set it to zero (but then the resulting distance is not guaranteed to be a metric). The default value is ``-1.0``, which means the maximum value in the distance matrix is used. *Returns:* *(float)* The EMD value. ---- emd_with_flow() ~~~~~~~~~~~~~~~ .. code:: python emd_with_flow(first_histogram, second_histogram, distance_matrix, extra_mass_penalty=-1.0) Arguments are the same as for ``emd()``. *Returns:* *(tuple(float, list(list(float))))* The EMD value and the associated minimum-cost flow. ---- emd_samples() ~~~~~~~~~~~~~ .. code:: python emd_samples(first_array, second_array, extra_mass_penalty=-1.0, distance='euclidean', normalized=True, bins='auto', range=None) *Arguments:* - ``first_array`` *(Iterable)*: A 1D array of samples used to generate a histogram. - ``second_array`` *(Iterable)*: A 1D array of samples used to generate a histogram. *Keyword Arguments:* - ``extra_mass_penalty`` *(float)*: Same as for ``emd()``. - ``distance`` *(string or function)*: A string or function implementing a metric on a 1D ``np.ndarray``. Defaults to the Euclidean distance. Currently limited to 'euclidean' or your own function, which must take a 1D array and return a square 2D array of pairwise distances. - ``normalized`` (*boolean*): If true (default), treat histograms as fractions of the dataset. If false, treat histograms as counts. In the latter case the EMD will vary greatly by array length. - ``bins`` *(int or string)*: The number of bins to include in the generated histogram. If a string, must be one of the bin selection algorithms accepted by ``np.histogram()``. Defaults to ``'auto'``, which gives the maximum of the 'sturges' and 'fd' estimators. - ``range`` *(tuple(int, int))*: The lower and upper range of the bins, passed to ``numpy.histogram()``. Defaults to the range of the union of ``first_array`` and ``second_array``. Note: if the given range is not a superset of the default range, no warning will be given. *Returns:* *(float)* The EMD value between the histograms of ``first_array`` and ``second_array``. ---- Limitations and Caveats ----------------------- - ``emd()`` and ``emd_with_flow()``: - The ``distance_matrix`` is assumed to represent a metric; there is no check to ensure that this is true. See the documentation in ``pyemd/lib/emd_hat.hpp`` for more information. - The histograms and distance matrix must be numpy arrays of type ``np.float64``. The original C++ template function can accept any numerical C++ type, but this wrapper only instantiates the template with ``double`` (Cython converts ``np.float64`` to ``double``). If there's demand, I can add support for other types. - ``emd_with_flow()``: - The flow matrix does not contain the flows to/from the extra mass bin. - ``emd_samples()``: - Using the default ``bins='auto'`` results in an extra call to ``np.histogram()`` to determine the bin lengths, since `the NumPy bin-selectors are not exposed in the public API `_. For performance, you may want to set the bins yourself. Contributing ------------ To help develop PyEMD, fork the project on GitHub and install the requirements with ``pip install -r requirements.txt``. The ``Makefile`` defines some tasks to help with development: - ``test``: Run the test suite - ``build`` Generate and compile the Cython extension - ``clean``: Remove the compiled Cython extension - ``default``: Run ``build`` Tests for different Python environments can be run with ``tox``. Credit ------ - All credit for the actual algorithm and implementation goes to `Ofir Pele `_ and `Michael Werman `_. See the `relevant paper `_. - Thanks to the Cython developers for making this kind of wrapper relatively easy to write. Please cite these papers if you use this code: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ofir Pele and Michael Werman. A linear time histogram metric for improved SIFT matching. *Computer Vision - ECCV 2008*, Marseille, France, 2008, pp. 495-508. .. code-block:: latex @INPROCEEDINGS{pele2008, title={A linear time histogram metric for improved sift matching}, author={Pele, Ofir and Werman, Michael}, booktitle={Computer Vision--ECCV 2008}, pages={495--508}, year={2008}, month={October}, publisher={Springer} } Ofir Pele and Michael Werman. Fast and robust earth mover's distances. *Proc. 2009 IEEE 12th Int. Conf. on Computer Vision*, Kyoto, Japan, 2009, pp. 460-467. .. code-block:: latex @INPROCEEDINGS{pele2009, title={Fast and robust earth mover's distances}, author={Pele, Ofir and Werman, Michael}, booktitle={2009 IEEE 12th International Conference on Computer Vision}, pages={460--467}, year={2009}, month={September}, organization={IEEE} } pyemd-0.5.1/conftest.py000066400000000000000000000001561323351540500150350ustar00rootroot00000000000000#!/usr/bin/env python3 # -*- coding: utf-8 -*- # conftest.py collect_ignore = ["setup.py", "build", "dist"] pyemd-0.5.1/dev_requirements.txt000066400000000000000000000000201323351540500167460ustar00rootroot00000000000000Cython >=0.20.2 pyemd-0.5.1/dist_requirements.txt000066400000000000000000000000361323351540500171420ustar00rootroot00000000000000docutils pygments twine wheel pyemd-0.5.1/pyemd/000077500000000000000000000000001323351540500137525ustar00rootroot00000000000000pyemd-0.5.1/pyemd/__about__.py000066400000000000000000000012461323351540500162350ustar00rootroot00000000000000#!/usr/bin/env python3 # -*- coding: utf-8 -*- # __about__.py """PyEMD metadata""" __title__ = 'pyemd' __version__ = '0.5.1' __description__ = ("A Python wrapper for Ofir Pele and Michael Werman's " "implementation of the Earth Mover's Distance.") __author__ = 'Will Mayner' __author_email__ = 'wmayner@gmail.com' __author_website__ = 'http://willmayner.com' __license__ = 'MIT' __copyright__ = 'Copyright (c) 2014-2017 Will Mayner' __url__ = 'http://github.com/wmayner/pyemd' __all__ = ['__title__', '__version__', '__description__', '__author__', '__author_email__', '__author_website__', '__license__', '__copyright__', '__url__'] pyemd-0.5.1/pyemd/__init__.py000066400000000000000000000044031323351540500160640ustar00rootroot00000000000000#!/usr/bin/env python3 # -*- coding: utf-8 -*- # __init__.py """ PyEMD ===== PyEMD is a Python wrapper for `Ofir Pele and Michael Werman's implementation of the Earth Mover's Distance `_ that allows it to be used with NumPy. **If you use this code, please cite the papers listed at the end of the README.** Use PyEMD like so: Usage ~~~~~ >>> from pyemd import emd >>> import numpy as np >>> first_signature = np.array([0.0, 1.0]) >>> second_signature = np.array([5.0, 3.0]) >>> distance_matrix = np.array([[0.0, 0.5], [0.5, 0.0]]) >>> emd(first_signature, second_signature, distance_matrix) 3.5 You can also get the associated minimum-cost flow: >>> from pyemd import emd_with_flow >>> emd_with_flow(first_signature, second_signature, distance_matrix) (3.5, [[0.0, 0.0], [0.0, 1.0]]) You can also calculate the EMD directly from two arrays of observations: >>> from pyemd import emd_samples >>> first_array = [1,2,3,4] >>> second_array = [2,3,4,5] >>> emd_samples(first_array, second_array, bins=2) 0.5 Limitations and Caveats ~~~~~~~~~~~~~~~~~~~~~~~ - ``distance_matrix`` must be symmetric. - ``distance_matrix`` is assumed to represent a true metric. This must be enforced by the caller. See the documentation in ``pyemd/lib/emd_hat.hpp``. - The flow matrix does not contain the flows to/from the extra mass bin. - The signatures and distance matrix must be numpy arrays of ``np.float``. The original C++ template function can accept any numerical C++ type, but this wrapper only instantiates the template with ``double`` (Cython converts ``np.float`` to ``double``). If there's demand, I can add support for other types. Credit ~~~~~~ - All credit for the actual algorithm and implementation goes to `Ofir Pele `_ and `Michael Werman `_. See the `relevant paper `_. - Thanks to the Cython devlopers for making this kind of wrapper relatively easy to write. :copyright: Copyright (c) 2014-2018 Will Mayner. :license: See the LICENSE file. """ from .__about__ import * from .emd import emd, emd_with_flow, emd_samples pyemd-0.5.1/pyemd/emd.pyx000066400000000000000000000261621323351540500152700ustar00rootroot00000000000000#!/usr/bin/env python3 # -*- coding: utf-8 -*- # distutils: language = c++ # emd.pyx from libcpp.pair cimport pair from libcpp.vector cimport vector import cython # Import both NumPy and the Cython declarations for NumPy import numpy as np cimport numpy as np # Declare the interface to the C++ EMD library # ============================================ cdef extern from "lib/emd_hat.hpp": cdef double \ emd_hat_gd_metric_double(vector[double], vector[double], vector[vector[double]], double) except + cdef pair[double, vector[vector[double]]] \ emd_hat_gd_metric_double_with_flow_wrapper(vector[double], vector[double], vector[vector[double]], double) except + # Define the API # ============== DEFAULT_EXTRA_MASS_PENALTY = -1.0 def _validate_emd_input(first_histogram, second_histogram, distance_matrix): """Validate EMD input.""" if (first_histogram.shape[0] > distance_matrix.shape[0] or second_histogram.shape[0] > distance_matrix.shape[0]): raise ValueError('Histogram lengths cannot be greater than the ' 'number of rows or columns of the distance matrix') if (first_histogram.shape[0] != second_histogram.shape[0]): raise ValueError('Histogram lengths must be equal') def emd(np.ndarray[np.float64_t, ndim=1, mode="c"] first_histogram, np.ndarray[np.float64_t, ndim=1, mode="c"] second_histogram, np.ndarray[np.float64_t, ndim=2, mode="c"] distance_matrix, extra_mass_penalty=DEFAULT_EXTRA_MASS_PENALTY): u"""Return the EMD between two histograms using the given distance matrix. The Earth Mover's Distance is the minimal cost of turning one histogram into another by moving around the “dirt” in the bins, where the cost of moving dirt from one bin to another is given by the amount of dirt times the “ground distance” between the bins. Arguments: first_histogram (np.ndarray): A 1D array of type np.float64 of length N. second_histogram (np.ndarray): A 1D array of np.float64 of length N. distance_matrix (np.ndarray): A 2D array of np.float64, of size at least N × N. This defines the underlying metric, or ground distance, by giving the pairwise distances between the histogram bins. It must represent a metric; there is no warning if it doesn't. Keyword Arguments: extra_mass_penalty (float): The penalty for extra mass. If you want the resulting distance to be a metric, it should be at least half the diameter of the space (maximum possible distance between any two points). If you want partial matching you can set it to zero (but then the resulting distance is not guaranteed to be a metric). The default value is -1, which means the maximum value in the distance matrix is used. Returns: float: The EMD value. Raises: ValueError: If the length of either histogram is greater than the number of rows or columns of the distance matrix, or if the histograms aren't the same length. """ _validate_emd_input(first_histogram, second_histogram, distance_matrix) return emd_hat_gd_metric_double(first_histogram, second_histogram, distance_matrix, extra_mass_penalty) def emd_with_flow(np.ndarray[np.float64_t, ndim=1, mode="c"] first_histogram, np.ndarray[np.float64_t, ndim=1, mode="c"] second_histogram, np.ndarray[np.float64_t, ndim=2, mode="c"] distance_matrix, extra_mass_penalty=DEFAULT_EXTRA_MASS_PENALTY): u"""Return the EMD between two histograms using the given distance matrix. The Earth Mover's Distance is the minimal cost of turning one histogram into another by moving around the “dirt” in the bins, where the cost of the “ground distance” between the bins. moving dirt from one bin to another is given by the amount of dirt times Arguments: first_histogram (np.ndarray): A 1D array of type np.float64 of length N. second_histogram (np.ndarray): A 1D array of np.float64 of length N. distance_matrix (np.ndarray): A 2D array of np.float64, of size at least N × N. This defines the underlying metric, or ground distance, by giving the pairwise distances between the histogram bins. It must represent a metric; there is no warning if it doesn't. Keyword Arguments: extra_mass_penalty (float): The penalty for extra mass. If you want the resulting distance to be a metric, it should be at least half the diameter of the space (maximum possible distance between any two points). If you want partial matching you can set it to zero (but then the resulting distance is not guaranteed to be a metric). The default value is -1, which means the maximum value in the distance matrix is used. Returns: (tuple(float, list(list(float)))): The EMD value and the associated minimum-cost flow. Raises: ValueError: If the length of either histogram is greater than the number of rows or columns of the distance matrix, or if the histograms aren't the same length. """ _validate_emd_input(first_histogram, second_histogram, distance_matrix) return emd_hat_gd_metric_double_with_flow_wrapper(first_histogram, second_histogram, distance_matrix, extra_mass_penalty) def euclidean_pairwise_distance_matrix(x): """Calculate the Euclidean pairwise distance matrix for a 1D array.""" distance_matrix = np.abs(np.repeat(x, len(x)) - np.tile(x, len(x))) return distance_matrix.reshape(len(x), len(x)) def emd_samples(first_array, second_array, extra_mass_penalty=DEFAULT_EXTRA_MASS_PENALTY, distance='euclidean', normalized=True, bins='auto', range=None): u"""Return the EMD between the histograms of two arrays. See ``emd()`` for more information about the EMD. Note: Pairwise ground distances are taken from the center of the bins. Arguments: first_array (Iterable): A 1D array of samples used to generate a histogram. second_array (Iterable): A 1D array of samples used to generate a histogram. Keyword Arguments: extra_mass_penalty (float): The penalty for extra mass. If you want the resulting distance to be a metric, it should be at least half the diameter of the space (maximum possible distance between any two points). If you want partial matching you can set it to zero (but then the resulting distance is not guaranteed to be a metric). The default value is -1, which means the maximum value in the distance matrix is used. distance (string or function): A string or function implementing a metric on a 1D ``np.ndarray``. Defaults to the Euclidean distance. Currently limited to 'euclidean' or your own function, which must take a 1D array and return a square 2D array of pairwise distances. normalized (boolean): If true (default), treat histograms as fractions of the dataset. If false, treat histograms as counts. In the latter case the EMD will vary greatly by array length. bins (int or string): The number of bins to include in the generated histogram. If a string, must be one of the bin selection algorithms accepted by ``np.histogram()``. Defaults to 'auto', which gives the maximum of the 'sturges' and 'fd' estimators. range (tuple(int, int)): The lower and upper range of the bins, passed to ``numpy.histogram()``. Defaults to the range of the union of ``first_array`` and `second_array``.` Note: if the given range is not a superset of the default range, no warning will be given. Returns: float: The EMD value between the histograms of ``first_array`` and ``second_array``. """ first_array = np.array(first_array) second_array = np.array(second_array) # Validate arrays if not (first_array.size > 0 and second_array.size > 0): raise ValueError('Arrays of samples cannot be empty.') # Get the default range if range is None: range = (min(np.min(first_array), np.min(second_array)), max(np.max(first_array), np.max(second_array))) # Use automatic binning from `np.histogram()` # TODO: Use `np.histogram_bin_edges()` when it's available; # see https://github.com/numpy/numpy/issues/10183 if isinstance(bins, str): hist, _ = np.histogram(np.concatenate([first_array, second_array]), range=range, bins=bins) bins = len(hist) # Compute histograms first_histogram, bin_edges = np.histogram(first_array, range=range, bins=bins) second_histogram, _ = np.histogram(second_array, range=range, bins=bins) # Cast to C++ long first_histogram = first_histogram.astype(np.float64) second_histogram = second_histogram.astype(np.float64) # Normalize histograms to represent fraction of dataset in each bin if normalized: first_histogram = first_histogram / np.sum(first_histogram) second_histogram = second_histogram / np.sum(second_histogram) # Compute the distance matrix between the center of each bin bin_locations = np.mean([bin_edges[:-1], bin_edges[1:]], axis=0) if distance == 'euclidean': distance = euclidean_pairwise_distance_matrix distance_matrix = distance(bin_locations) # Validate distance matrix if len(distance_matrix) != len(distance_matrix[0]): raise ValueError( 'Distance matrix must be square; check your `distance` function.') if (first_histogram.shape[0] > len(distance_matrix) or second_histogram.shape[0] > len(distance_matrix)): raise ValueError( 'Distance matrix must have at least as many rows/columns as there ' 'are bins in the histograms; check your `distance` function.') # Return the EMD (no need to call the wrapper function, since this function # does its own validation, so we call the exposed C++ function directly) return emd_hat_gd_metric_double(first_histogram, second_histogram, distance_matrix, extra_mass_penalty) pyemd-0.5.1/pyemd/lib/000077500000000000000000000000001323351540500145205ustar00rootroot00000000000000pyemd-0.5.1/pyemd/lib/EMD_DEFS.hpp000066400000000000000000000036401323351540500164420ustar00rootroot00000000000000#ifndef EMD_DEFS_HXX_ #define EMD_DEFS_HXX_ //------------------------------------------------------------------------------ // Should be integral and 0 should convert automatically to the type // Did not check if can be changed to other types. typedef int NODE_T; //------------------------------------------------------------------------------ #endif // Copyright (c) 2009-2012, Ofir Pele // All rights reserved. // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of the The Hebrew University of Jerusalem nor the // names of its contributors may be used to endorse or promote products // derived from this software without specific prior written permission. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. pyemd-0.5.1/pyemd/lib/emd_hat.hpp000066400000000000000000000140361323351540500166360ustar00rootroot00000000000000#ifndef EMD_HAT_HPP #define EMD_HAT_HPP #include #include #include "EMD_DEFS.hpp" #include "flow_utils.hpp" /// Fastest version of EMD. Also, in my experience metric ground distance yields better /// performance. /// /// Required params: /// P,Q - Two histograms of size N /// C - The NxN matrix of the ground distance between bins of P and Q. Must be a metric. I /// recommend it to be a thresholded metric (which is also a metric, see ICCV paper). /// /// Optional params: /// extra_mass_penalty - The penalty for extra mass - If you want the /// resulting distance to be a metric, it should be /// at least half the diameter of the space (maximum /// possible distance between any two points). If you /// want partial matching you can set it to zero (but /// then the resulting distance is not guaranteed to be a metric). /// Default value is -1 which means 1*max_element_in_C /// F - *F is filled with flows or nothing happens to F. See template param FLOW_TYPE. /// Note that EMD and EMD-HAT does not necessarily have a unique flow solution. /// We assume *F is already allocated and has enough space and is initialized to zeros. /// See also flow_utils.hpp file for flow-related utils. /// Default value: NULL and then FLOW_TYPE must be NO_FLOW. /// /// Required template params: /// NUM_T - the type of the histogram bins count (should be one of: int, long int, long long int, double) /// /// Optional template params: /// FLOW_TYPE == NO_FLOW - does nothing with the given F. /// == WITHOUT_TRANSHIPMENT_FLOW - fills F with the flows between bins connected /// with edges smaller than max(C). /// == WITHOUT_EXTRA_MASS_FLOW - fills F with the flows between all bins, except the flow /// to the extra mass bin. /// Note that if F is the default NULL then FLOW_TYPE must be NO_FLOW. template struct emd_hat_gd_metric { NUM_T operator()(const std::vector& P, const std::vector& Q, const std::vector< std::vector >& C, NUM_T extra_mass_penalty= -1, std::vector< std::vector >* F= NULL); }; /// Same as emd_hat_gd_metric, but does not assume metric property for the ground distance (C). /// Note that C should still be symmetric and non-negative! template struct emd_hat { NUM_T operator()(const std::vector& P, const std::vector& Q, const std::vector< std::vector >& C, NUM_T extra_mass_penalty= -1, std::vector< std::vector >* F= NULL); }; /// ========================================================================= /// 2014-02-27 - Added by Will Mayner /// ------------------------------------------------------------------------- /// Instantiate the template for importing into Cython emd_hat_gd_metric emd_hat_gd_metric_double; /// ========================================================================= /// ========================================================================= /// 2017-01-06 - Added by Will Mayner /// ------------------------------------------------------------------------- emd_hat_gd_metric emd_hat_gd_metric_double_with_flow; /// ========================================================================= /// ========================================================================= /// 2016-11-25 - Added by Rémi Louf /// ------------------------------------------------------------------------- /// Wrapper function to output the flow std::pair< double, std::vector > > emd_hat_gd_metric_double_with_flow_wrapper( const std::vector& P, const std::vector& Q, const std::vector >& C, double extra_mass_penalty) { std::vector > flow(P.size(), std::vector(P.size())); double emd = emd_hat_gd_metric_double_with_flow(P, Q, C, extra_mass_penalty, &flow); std::pair< double, std::vector > > results = std::make_pair(emd, flow); return results; } /// ========================================================================= #include "emd_hat_impl.hpp" #endif // Copyright (c) 2009-2012, Ofir Pele // All rights reserved. // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of the The Hebrew University of Jerusalem nor the // names of its contributors may be used to endorse or promote products // derived from this software without specific prior written permission. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. pyemd-0.5.1/pyemd/lib/emd_hat_impl.hpp000066400000000000000000000453371323351540500176670ustar00rootroot00000000000000#ifndef EMD_HAT_IMPL_HPP #define EMD_HAT_IMPL_HPP //======================================================================================= // Implementation stuff //======================================================================================= #include "min_cost_flow.hpp" #include #include #include #include #include template void fillFWithZeros(std::vector< std::vector >& F) { for (NODE_T i= 0; i struct emd_hat_impl; template NUM_T emd_hat_gd_metric::operator()(const std::vector& Pc, const std::vector& Qc, const std::vector< std::vector >& C, NUM_T extra_mass_penalty, std::vector< std::vector >* F) { if (FLOW_TYPE!=NO_FLOW) fillFWithZeros(*F); assert( (F!=NULL) || (FLOW_TYPE==NO_FLOW) ); std::vector P= Pc; std::vector Q= Qc; // Assuming metric property we can pre-flow 0-cost edges {for (NODE_T i=0; i()(Pc,Qc,P,Q,C,extra_mass_penalty,F); } // emd_hat_gd_metric template NUM_T emd_hat::operator()(const std::vector& P, const std::vector& Q, const std::vector< std::vector >& C, NUM_T extra_mass_penalty, std::vector< std::vector >* F) { if (FLOW_TYPE!=NO_FLOW) fillFWithZeros(*F); return emd_hat_impl()(P,Q,P,Q,C,extra_mass_penalty,F); } // emd_hat //----------------------------------------------------------------------------------------------- // Implementing it for different types //----------------------------------------------------------------------------------------------- // Blocking instantiation for a non-overloaded template param template struct emd_hat_impl { }; // emd_hat_impl //=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= // Main implementation //=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= template struct emd_hat_impl_integral_types { NUM_T operator()( const std::vector& POrig, const std::vector& QOrig, const std::vector& Pc, const std::vector& Qc, const std::vector< std::vector >& Cc, NUM_T extra_mass_penalty, std::vector< std::vector >* F) { //------------------------------------------------------- NODE_T N= Pc.size(); assert(Qc.size()==N); // Ensuring that the supplier - P, have more mass. std::vector P; std::vector Q; std::vector< std::vector > C(Cc); NUM_T abs_diff_sum_P_sum_Q; NUM_T sum_P= 0; NUM_T sum_Q= 0; {for (NODE_T i=0; isum_P) { needToSwapFlow= true; P= Qc; Q= Pc; // transpose C for (NODE_T i=0; i b(2*N+2); const NODE_T THRESHOLD_NODE= 2*N; const NODE_T ARTIFICIAL_NODE= 2*N+1; // need to be last ! {for (NODE_T i=0; i=0); if ( C[i][j]>maxC ) maxC= C[i][j]; }} }} if (extra_mass_penalty==-1) extra_mass_penalty= maxC; //------------------------------------------------------- //============================================================= std::set< NODE_T > sources_that_flow_not_only_to_thresh; std::set< NODE_T > sinks_that_get_flow_not_only_from_thresh; NUM_T pre_flow_cost= 0; //============================================================= //============================================================= // regular edges between sinks and sources without threshold edges std::vector< std::list< edge > > c(b.size()); {for (NODE_T i=0; i(j+N , C[i][j]) ); }} // j }}// i // checking which are not isolated {for (NODE_T i=0; i(THRESHOLD_NODE, 0) ); }} {for (NODE_T j=0; j(j+N, maxC) ); }} // artificial arcs - Note the restriction that only one edge i,j is artificial so I ignore it... {for (NODE_T i=0; i(ARTIFICIAL_NODE, maxC + 1 ) ); c[ARTIFICIAL_NODE].push_back( edge(i, maxC + 1 ) ); }} //============================================================= //==================================================== // remove nodes with supply demand of 0 // and vertexes that are connected only to the // threshold vertex //==================================================== NODE_T current_node_name= 0; // Note here it should be vector and not vector // as I'm using -1 as a special flag !!! const int REMOVE_NODE_FLAG= -1; std::vector nodes_new_names(b.size(),REMOVE_NODE_FLAG); std::vector nodes_old_names; nodes_old_names.reserve(b.size()); {for (NODE_T i=0; i=N) { // sink pre_flow_cost-= (b[i]*maxC); } b[THRESHOLD_NODE]+= b[i]; // add mass(i=N) } } }} //i nodes_new_names[THRESHOLD_NODE]= current_node_name; nodes_old_names.push_back(THRESHOLD_NODE); ++current_node_name; nodes_new_names[ARTIFICIAL_NODE]= current_node_name; nodes_old_names.push_back(ARTIFICIAL_NODE); ++current_node_name; std::vector bb(current_node_name); NODE_T j=0; {for (NODE_T i=0; i > > cc(bb.size()); {for (NODE_T i=0; i >::const_iterator it= c[i].begin(); it!=c[i].end(); ++it) { if ( nodes_new_names[it->_to]!=REMOVE_NODE_FLAG) { cc[ nodes_new_names[i] ].push_back( edge( nodes_new_names[it->_to], it->_cost ) ); } }} }} //==================================================== #ifndef NDEBUG NUM_T DEBUG_sum_bb= 0; for (NODE_T i=0; i mcf; NUM_T my_dist; std::vector< std::list< edge0 > > flows(bb.size()); //std::cout << bb.size() << std::endl; //std::cout << cc.size() << std::endl; //tictoc timer; //timer.tic(); NUM_T mcf_dist= mcf(bb,cc,flows); //timer.toc(); //std::cout << "min_cost_flow time== " << timer.totalTimeSec() << std::endl; if (FLOW_TYPE!=NO_FLOW) { for (NODE_T new_name_from=0; new_name_from >::const_iterator it= flows[new_name_from].begin(); it!=flows[new_name_from].end(); ++it) { if (new_name_from==nodes_new_names[THRESHOLD_NODE]||it->_to==nodes_new_names[THRESHOLD_NODE]) continue; NODE_T i,j; NUM_T flow= it->_flow; bool reverseEdge= it->_to_to]-N; } else { i= nodes_old_names[it->_to]; j= nodes_old_names[new_name_from]-N; } if (flow!=0&&new_name_from!=nodes_new_names[THRESHOLD_NODE]&&it->_to!=nodes_new_names[THRESHOLD_NODE]) { assert(i struct emd_hat_impl { typedef int NUM_T; NUM_T operator()( const std::vector& POrig, const std::vector& QOrig, const std::vector& P, const std::vector& Q, const std::vector< std::vector >& C, NUM_T extra_mass_penalty, std::vector< std::vector >* F) { return emd_hat_impl_integral_types()(POrig,QOrig,P,Q,C,extra_mass_penalty,F); } }; // emd_hat_impl template struct emd_hat_impl { typedef long int NUM_T; NUM_T operator()( const std::vector& POrig, const std::vector& QOrig, const std::vector& P, const std::vector& Q, const std::vector< std::vector >& C, NUM_T extra_mass_penalty, std::vector< std::vector >* F) { return emd_hat_impl_integral_types()(POrig,QOrig,P,Q,C,extra_mass_penalty,F); } }; // emd_hat_impl template struct emd_hat_impl { typedef long long int NUM_T; NUM_T operator()( const std::vector& POrig, const std::vector& QOrig, const std::vector& P, const std::vector& Q, const std::vector< std::vector >& C, NUM_T extra_mass_penalty, std::vector< std::vector >* F) { return emd_hat_impl_integral_types()(POrig,QOrig,P,Q,C,extra_mass_penalty,F); } }; // emd_hat_impl //---------------------------------------------------------------------------------------- //---------------------------------------------------------------------------------------- // floating types //---------------------------------------------------------------------------------------- template struct emd_hat_impl { typedef double NUM_T; typedef long long int CONVERT_TO_T; NUM_T operator()( const std::vector& POrig, const std::vector& QOrig, const std::vector& P, const std::vector& Q, const std::vector< std::vector >& C, NUM_T extra_mass_penalty, std::vector< std::vector >* F) { // TODO: static assert assert(sizeof(CONVERT_TO_T)>=8); // This condition should hold: // ( 2^(sizeof(CONVERT_TO_T*8)) >= ( MULT_FACTOR^2 ) // Note that it can be problematic to check it because // of overflow problems. I simply checked it with Linux calc // which has arbitrary precision. const double MULT_FACTOR= 1000000; // Constructing the input const NODE_T N= P.size(); std::vector iPOrig(N); std::vector iQOrig(N); std::vector iP(N); std::vector iQ(N); std::vector< std::vector > iC(N, std::vector(N) ); std::vector< std::vector > iF(N, std::vector(N) ); // Converting to CONVERT_TO_T double sumP= 0.0; double sumQ= 0.0; double maxC= C[0][0]; for (NODE_T i= 0; imaxC) maxC= C[i][j]; } } double minSum= std::min(sumP,sumQ); double maxSum= std::max(sumP,sumQ); double PQnormFactor= MULT_FACTOR/maxSum; double CnormFactor= MULT_FACTOR/maxC; for (NODE_T i= 0; i(floor(POrig[i]*PQnormFactor+0.5)); iQOrig[i]= static_cast(floor(QOrig[i]*PQnormFactor+0.5)); iP[i]= static_cast(floor(P[i]*PQnormFactor+0.5)); iQ[i]= static_cast(floor(Q[i]*PQnormFactor+0.5)); for (NODE_T j= 0; j(floor(C[i][j]*CnormFactor+0.5)); if (FLOW_TYPE!=NO_FLOW) { iF[i][j]= static_cast(floor(((*F)[i][j])*PQnormFactor+0.5)); } } } // computing distance without extra mass penalty double dist= emd_hat_impl()(iPOrig,iQOrig,iP,iQ,iC,0,&iF); // unnormalize dist= dist/PQnormFactor; dist= dist/CnormFactor; // adding extra mass penalty if (extra_mass_penalty==-1) extra_mass_penalty= maxC; dist+= (maxSum-minSum)*extra_mass_penalty; // converting flow to double if (FLOW_TYPE!=NO_FLOW) { for (NODE_T i= 0; i //---------------------------------------------------------------------------------------- #endif // Copyright (c) 2009-2012, Ofir Pele // All rights reserved. // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of the The Hebrew University of Jerusalem nor the // names of its contributors may be used to endorse or promote products // derived from this software without specific prior written permission. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. pyemd-0.5.1/pyemd/lib/emd_hat_signatures_interface.hpp000066400000000000000000000130201323351540500231120ustar00rootroot00000000000000#ifndef _EMD_HAT_SIGNATURE_INTERFACE_HXX #define _EMD_HAT_SIGNATURE_INTERFACE_HXX #include "EMD_DEFS.hpp" #include "emd_hat.hpp" //============================================================================= // This interface is similar to Rubner's interface. See: // http://www.cs.duke.edu/~tomasi/software/emd.htm // With the following changes; // 1. Weights of signature should be of type NUM_T (see emd_hat.hpp) // 2. Return value of the distance function (func) should be of type NUM_T // 3. Return value of the emd_hat_signature_interface function is NUM_T // 4. The function does not return a flow (I may add this in future, if needed) // 5. The function also gets the penalty for extra mass - if you want metric property // should be at least half the diameter of the space (maximum possible distance // between any two points). In Rubner's code this is implicitly 0. // 6. The result is not normalized with the flow. // // To get the same results as Rubner's code you should set extra_mass_penalty to 0, // and divide by the minimum of the sum of the two signature's weights. However, I // suggest not to do this as you lose the metric property and more importantly, in my // experience the performance is better with emd_hat. for more on the difference // between emd and emd_hat, see the paper: // A Linear Time Histogram Metric for Improved SIFT Matching // Ofir Pele, Michael Werman // ECCV 2008 // // To get shorter running time, set the ground distance function (func) to // be a thresholded distance. For example: min( L2, T ). Where T is some threshold. // Note that the running time is shorter with smaller T values. Note also that // thresholding the distance will probably increase accuracy. Finally, a thresholded // metric is also a metric. See paper: // Fast and Robust Earth Mover's Distances // Ofir Pele, Michael Werman // ICCV 2009 // // If you use this code, please cite the papers. //============================================================================= /*****************************************************************************/ /* feature_tt SHOULD BE MODIFIED BY THE USER TO REFLECT THE FEATURE TYPE */ typedef double feature_tt; /*****************************************************************************/ template struct signature_tt { int n; /* Number of features in the signature */ feature_tt* Features; /* Pointer to the features vector */ NUM_T* Weights; /* Pointer to the weights of the features (Changed from Rubner's)*/ }; /// Similar to Rubner's emd interface. /// extra_mass_penalty - it's alpha*maxD_ij in my ECCV paper. If you want metric property /// should be at least half the diameter of the space (maximum possible distance /// between any two points). In Rubner's code this is implicitly 0. /// Default value is -1 which means 1*max_distance_between_bins_of_signatures template NUM_T emd_hat_signature_interface(signature_tt* Signature1, signature_tt* Signature2, NUM_T (*func)(feature_tt*, feature_tt*), NUM_T extra_mass_penalty) { std::vector P(Signature1->n + Signature2->n , 0); std::vector Q(Signature1->n + Signature2->n , 0); for (int i=0; in; ++i) { P[i]= Signature1->Weights[i]; } for (int j=0; jn; ++j) { Q[j+Signature1->n]= Signature2->Weights[j]; } std::vector< std::vector > C(P.size(), std::vector(P.size(), 0) ); {for (int i=0; in; ++i) { {for (int j=0; jn; ++j) { NUM_T dist= func( (Signature1->Features+i) , (Signature2->Features+j) ); assert(dist>=0); C[i][j+Signature1->n]= dist; C[j+Signature1->n][i]= dist; }} }} return emd_hat()(P,Q,C, extra_mass_penalty); } // emd_hat_signature_interface #endif // Copyright (c) 2009-2012, Ofir Pele // All rights reserved. // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of the The Hebrew University of Jerusalem nor the // names of its contributors may be used to endorse or promote products // derived from this software without specific prior written permission. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. pyemd-0.5.1/pyemd/lib/flow_utils.hpp000066400000000000000000000103061323351540500174200ustar00rootroot00000000000000#ifndef FLOW_UTILS_HPP #define FLOW_UTILS_HPP #include "EMD_DEFS.hpp" #include #include enum FLOW_TYPE_T { NO_FLOW= 0, WITHOUT_TRANSHIPMENT_FLOW, WITHOUT_EXTRA_MASS_FLOW }; /// returns the flow from/to transhipment vertex given flow F which was computed using /// FLOW_TYPE_T of kind WITHOUT_TRANSHIPMENT_FLOW. template void return_flow_from_to_transhipment_vertex(const std::vector< std::vector >& F, const std::vector& P, const std::vector& Q, std::vector& flow_from_P_to_transhipment, std::vector& flow_from_transhipment_to_Q) { flow_from_P_to_transhipment= P; flow_from_transhipment_to_Q= Q; for (NODE_T i= 0; i void transform_flow_to_regular(std::vector< std::vector >& F, const std::vector& P, const std::vector& Q) { const NODE_T N= P.size(); std::vector flow_from_P_to_transhipment(N); std::vector flow_from_transhipment_to_Q(N); return_flow_from_to_transhipment_vertex(F,P,Q, flow_from_P_to_transhipment, flow_from_transhipment_to_Q); NODE_T i= 0; NODE_T j= 0; while( true ) { while (i #include #include #include #include #include "EMD_DEFS.hpp" //------------------------------------------------------------------------------ template struct edge { edge(NODE_T to, NUM_T cost) : _to(to), _cost(cost) {} NODE_T _to; NUM_T _cost; }; template struct edgeCompareByCost { bool operator()(const edge& a, const edge& b) { return a._cost struct edge0 { edge0(NODE_T to, NUM_T cost, NUM_T flow) : _to(to), _cost(cost), _flow(flow) {} NODE_T _to; NUM_T _cost; NUM_T _flow; }; template struct edge1 { edge1(NODE_T to, NUM_T reduced_cost) : _to(to), _reduced_cost(reduced_cost) {} NODE_T _to; NUM_T _reduced_cost; }; template struct edge2 { edge2(NODE_T to, NUM_T reduced_cost, NUM_T residual_capacity) : _to(to), _reduced_cost(reduced_cost), _residual_capacity(residual_capacity) {} NODE_T _to; NUM_T _reduced_cost; NUM_T _residual_capacity; }; template struct edge3 { edge3(NODE_T to=0, NUM_T dist=0) : _to(to), _dist(dist) {} NODE_T _to; NUM_T _dist; }; //------------------------------------------------------------------------------ //------------------------------------------------------------------------------ template class min_cost_flow { NODE_T _num_nodes; std::vector _nodes_to_Q; //tictoc tictoc_shortest_path; //tictoc tictoc_while_true; //tictoc tmp_tic_toc; //tictoc tictoc_all_function; public: // e - supply(positive) and demand(negative). // c[i] - edges that goes from node i. first is the second nod // x - the flow is returned in it NUM_T operator()(std::vector& e, const std::vector< std::list< edge > >& c, std::vector< std::list< edge0 > >& x) { //for (NODE_T i=0; i >::const_iterator it= c[from].begin(); it!=c[from].end(); ++it) { x[from].push_back( edge0 (it->_to, it->_cost, 0) ); x[it->_to].push_back( edge0 (from, -it->_cost,0) ); }} // it }} // from // reduced costs for forward edges (c[i,j]-pi[i]+pi[j]) // Note that for forward edges the residual capacity is infinity std::vector< std::list< edge1 > > r_cost_forward(_num_nodes); {for (NODE_T from=0; from<_num_nodes; ++from) { {for (typename std::list< edge >::const_iterator it= c[from].begin(); it!=c[from].end(); ++it) { r_cost_forward[from].push_back( edge1(it->_to,it->_cost) ); }} }} // reduced costs and capacity for backward edges (c[j,i]-pi[j]+pi[i]) // Since the flow at the beginning is 0, the residual capacity is also zero std::vector< std::list< edge2 > > r_cost_cap_backward(_num_nodes); {for (NODE_T from=0; from<_num_nodes; ++from) { {for (typename std::list< edge >::const_iterator it= c[from].begin(); it!=c[from].end(); ++it) { r_cost_cap_backward[ it->_to ].push_back( edge2(from,-it->_cost,0) ); }} // it }} // from // Max supply TODO:demand?, given U?, optimization-> min out of demand,supply NUM_T U= 0; {for (NODE_T i=0; i<_num_nodes; ++i) { if (e[i]>U) U= e[i]; }} NUM_T delta= static_cast(pow(2.0l,ceil(log(static_cast(U))/log(2.0)))); std::vector< NUM_T > d(_num_nodes); std::vector< NODE_T > prev(_num_nodes); delta= 1; //while (delta>=1) { // delta-scaling phase //cout << "delta==" << delta << endl; //tictoc_while_true.tic(); while (true) { //until we break when S or T is empty NUM_T maxSupply= 0; NODE_T k=0; for (NODE_T i=0; i<_num_nodes; ++i) { if (e[i]>0) { if (maxSupply >::iterator itccb= r_cost_cap_backward[from].begin(); while ( (itccb!=r_cost_cap_backward[from].end()) && (itccb->_to!=to) ) { ++itccb; } if (itccb!=r_cost_cap_backward[from].end()) { if (itccb->_residual_capacity_residual_capacity; } to= from; } while (to!=k); //--------------------------------------------------------------- //--------------------------------------------------------------- // augment delta flow from k to l (backwards actually...) to= l; do { NODE_T from= prev[to]; assert(from!=to); // TODO - might do here O(n) can be done in O(1) typename std::list< edge0 >::iterator itx= x[from].begin(); while (itx->_to!=to) { ++itx; } itx->_flow+= delta; // update residual for backward edges typename std::list< edge2 >::iterator itccb= r_cost_cap_backward[to].begin(); while ( (itccb!=r_cost_cap_backward[to].end()) && (itccb->_to!=from) ) { ++itccb; } if (itccb!=r_cost_cap_backward[to].end()) { itccb->_residual_capacity+= delta; } itccb= r_cost_cap_backward[from].begin(); while ( (itccb!=r_cost_cap_backward[from].end()) && (itccb->_to!=to) ) { ++itccb; } if (itccb!=r_cost_cap_backward[from].end()) { itccb->_residual_capacity-= delta; } // update e e[to]+= delta; e[from]-= delta; to= from; } while (to!=k); //--------------------------------------------------------------------------------- } // while true (until we break when S or T is empty) //tictoc_while_true.toc(); //cout << "while true== " << tictoc_while_true.totalTimeSec() << endl; //delta= delta/2; //} // (delta-scaling phase) // compute distance from x //cout << endl << endl; NUM_T dist= 0; {for (NODE_T from=0; from<_num_nodes; ++from) { {for (typename std::list< edge0 >::const_iterator it= x[from].begin(); it!=x[from].end(); ++it) { // if (it->_flow!=0) cout << from << "->" << it->_to << ": " << it->_flow << "x" << it->_cost << endl; dist+= (it->_cost*it->_flow); }} // it }} // from //tictoc_all_function.toc(); //cout << "operator() time==" << tictoc_all_function.totalTimeSec() << endl; //cout << "compute_shortest_path_time==" << tictoc_shortest_path.totalTimeSec() << endl; //cout << "tmp_tic_toc== " << tmp_tic_toc.totalTimeSec() << endl; return dist; } // operator() private: void compute_shortest_path(std::vector< NUM_T >& d, std::vector< NODE_T >& prev, NODE_T from, std::vector< std::list< edge1 > >& cost_forward, std::vector< std::list< edge2 > >& cost_backward, const std::vector& e, NODE_T& l) { //---------------------------------------------------------------- // Making heap (all inf except 0, so we are saving comparisons...) //---------------------------------------------------------------- std::vector< edge3 > Q(_num_nodes); Q[0]._to= from; _nodes_to_Q[from]= 0; Q[0]._dist= 0; NODE_T j=1; // TODO: both of these into a function? {for (NODE_T i=0; i::max(); ++j; }} {for (NODE_T i=from+1; i<_num_nodes; ++i) { Q[j]._to= i; _nodes_to_Q[i]= j; Q[j]._dist= std::numeric_limits::max(); ++j; }} //---------------------------------------------------------------- //---------------------------------------------------------------- // main loop //---------------------------------------------------------------- std::vector finalNodesFlg(_num_nodes, false); do { NODE_T u= Q[0]._to; d[u]= Q[0]._dist; // final distance finalNodesFlg[u]= true; if (e[u]<0) { l= u; break; } heap_remove_first(Q, _nodes_to_Q); // neighbors of u {for (typename std::list< edge1 >::const_iterator it= cost_forward[u].begin(); it!=cost_forward[u].end(); ++it) { assert (it->_reduced_cost>=0); NUM_T alt= d[u]+it->_reduced_cost; NODE_T v= it->_to; if ( (_nodes_to_Q[v] >::const_iterator it= cost_backward[u].begin(); it!=cost_backward[u].end(); ++it) { if (it->_residual_capacity>0) { assert (it->_reduced_cost>=0); NUM_T alt= d[u]+it->_reduced_cost; NODE_T v= it->_to; if ( (_nodes_to_Q[v] >::iterator it= cost_forward[from].begin(); it!=cost_forward[from].end(); ++it) { if (finalNodesFlg[from]) { it->_reduced_cost+= d[from] - d[l]; } if (finalNodesFlg[it->_to]) { it->_reduced_cost-= d[it->_to] - d[l]; } } } }} // reduced costs and capacity for backward edges (c[j,i]-pi[j]+pi[i]) {for (NODE_T from=0; from<_num_nodes; ++from) { { for (typename std::list< edge2 >::iterator it= cost_backward[from].begin(); it!=cost_backward[from].end(); ++it) { if (finalNodesFlg[from]) { it->_reduced_cost+= d[from] - d[l]; } if (finalNodesFlg[it->_to]) { it->_reduced_cost-= d[it->_to] - d[l]; } } }// it }} //--------------------------------------------------------------------------------- //tmp_tic_toc.toc(); //---------------------------------------------------------------- } // compute_shortest_path void heap_decrease_key(std::vector< edge3 >& Q, std::vector& nodes_to_Q, NODE_T v, NUM_T alt) { NODE_T i= nodes_to_Q[v]; Q[i]._dist= alt; while (i>0 && Q[PARENT(i)]._dist>Q[i]._dist) { swap_heap(Q, nodes_to_Q, i, PARENT(i)); i= PARENT(i); } } // heap_decrease_key void heap_remove_first(std::vector< edge3 >& Q, std::vector& nodes_to_Q) { swap_heap(Q, nodes_to_Q, 0, Q.size()-1); Q.pop_back(); heapify(Q,nodes_to_Q , 0); } // heap_remove_first void heapify(std::vector< edge3 >& Q, std::vector& nodes_to_Q, NODE_T i) { do { // TODO: change to loop NODE_T l= LEFT(i); NODE_T r= RIGHT(i); NODE_T smallest; if ( (l >& Q, std::vector& nodes_to_Q, NODE_T i, NODE_T j) { edge3 tmp= Q[i]; Q[i]= Q[j]; Q[j]= tmp; nodes_to_Q[ Q[j]._to ]= j; nodes_to_Q[ Q[i]._to ]= i; } // swap_heapify NODE_T LEFT(NODE_T i) { return 2*(i+1)-1; } NODE_T RIGHT(NODE_T i) { return 2*(i+1); // 2*(i+1)+1-1 } NODE_T PARENT(NODE_T i) { return (i-1)/2; } }; // end min_cost_flow #endif // Copyright (c) 2009-2012, Ofir Pele // All rights reserved. // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of the The Hebrew University of Jerusalem nor the // names of its contributors may be used to endorse or promote products // derived from this software without specific prior written permission. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. pyemd-0.5.1/pytest.ini000066400000000000000000000001261323351540500146640ustar00rootroot00000000000000[pytest] addopts = --color=yes --tb=auto --doctest-glob='*.rst' --doctest-modules -vv pyemd-0.5.1/requirements.txt000066400000000000000000000001171323351540500161170ustar00rootroot00000000000000-e . -r test_requirements.txt -r dev_requirements.txt -r dist_requirements.txt pyemd-0.5.1/setup.py000066400000000000000000000064061323351540500143540ustar00rootroot00000000000000#!/usr/bin/env python3 # -*- coding: utf-8 -*- import io import os import sys from warnings import warn from setuptools import Extension, setup from setuptools.command.build_ext import build_ext as _build_ext from setuptools.command.sdist import sdist as _sdist # Alias ModuleNotFound for Python <= 3.5 if (sys.version_info[0] < 3 or (sys.version_info[0] == 3 and sys.version_info[1] < 6)): ModuleNotFoundError = ImportError try: from Cython.Build import cythonize as _cythonize USE_CYTHON = True except (ImportError, ModuleNotFoundError): USE_CYTHON = False def cythonize(extensions, **_ignore): # Attempt to use Cython if USE_CYTHON: return _cythonize(extensions) # Cython is not available for extension in extensions: sources = [] for sfile in extension.sources: path, ext = os.path.splitext(sfile) if ext in ('.pyx', '.py'): if extension.language == 'c++': ext = '.cpp' else: ext = '.c' sfile = path + ext sources.append(sfile) extension.sources[:] = sources return extensions EXTENSIONS = [ Extension('pyemd.emd', sources=['pyemd/emd.pyx'], language="c++") ] EXT_MODULES = cythonize(EXTENSIONS) class sdist(_sdist): def run(self): # Make sure the compiled Cython files in the distribution are up-to-date if USE_CYTHON: _cythonize(EXTENSIONS) else: warn('\n\n\033[91m\033[1m WARNING: ' 'IF YOU A PREPARING A DISTRIBUTION: Cython is not available! ' 'The cythonized `*.cpp` files may be out of date. Please ' 'install Cython and run `sdist` again.' '\033[0m\n') _sdist.run(self) # See http://stackoverflow.com/a/21621689/1085344 class build_ext(_build_ext): def finalize_options(self): _build_ext.finalize_options(self) # Prevent numpy from thinking it is still in its setup process: if hasattr(__builtins__, '__NUMPY_SETUP__'): __builtins__.__NUMPY_SETUP__ = False import numpy self.include_dirs.append(numpy.get_include()) CMDCLASS = { 'sdist': sdist, 'build_ext': build_ext } with io.open('README.rst', encoding='utf-8') as f: README = f.read() ABOUT = {} with open('./pyemd/__about__.py') as f: exec(f.read(), ABOUT) REQUIRES = [ 'numpy >=1.9.0, <2.0.0' ] setup( name=ABOUT['__title__'], version=ABOUT['__version__'], description=ABOUT['__description__'], long_description=README, author=ABOUT['__author__'], author_email=ABOUT['__author_email__'], url=ABOUT['__url__'], license=ABOUT['__license__'], packages=['pyemd'], install_requires=REQUIRES, cmdclass=CMDCLASS, setup_requires=REQUIRES, ext_modules=EXT_MODULES, classifiers=[ 'Development Status :: 3 - Alpha', 'Intended Audience :: Developers', 'Natural Language :: English', 'License :: OSI Approved :: MIT License', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', ], ) pyemd-0.5.1/test/000077500000000000000000000000001323351540500136135ustar00rootroot00000000000000pyemd-0.5.1/test/.pylintrc000066400000000000000000000012141323351540500154560ustar00rootroot00000000000000[MESSAGES CONTROL] disable = fixme, import-error, locally-disabled, locally-enabled, invalid-name, [REPORTS] # Tells whether to display a full report or only the messages # DEFAULT: reports=yes # RATIONALE: run from Travis / tox, and don't need / want to parse output. reports=no [BASIC] # Regular expression which should only match function or class names that do # not require a docstring. # DEFAULT: no-docstring-rgx=__.*__ no-docstring-rgx=(test_*|__.*__|main) # Minimum line length for functions/classes that require docstrings, shorter # ones are exempt. # DEFAULT: docstring-min-length=-1 docstring-min-length=10 pyemd-0.5.1/test/test_pyemd.py000066400000000000000000000273341323351540500163530ustar00rootroot00000000000000#!/usr/bin/env python3 # -*- coding: utf-8 -*- # test/test_pyemd.py """Tests for PyEMD""" import numpy as np import pytest from pyemd import emd, emd_samples, emd_with_flow EMD_PRECISION = 5 FLOW_PRECISION = 4 def emd_assert(got, expected): assert round(got, EMD_PRECISION) == expected def emd_flow_assert(got, expected): got_value, got_flow = got expected_value, expected_flow = expected assert round(got_value, EMD_PRECISION) == expected_value assert np.array_equal(np.round(got_flow, FLOW_PRECISION), expected_flow) # `emd()` # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ def test_emd_1(): first_signature = np.array([0.0, 1.0]) second_signature = np.array([5.0, 3.0]) distance_matrix = np.array([[0.0, 0.5], [0.5, 0.0]]) emd_assert( emd(first_signature, second_signature, distance_matrix), 3.5 ) def test_emd_2(): first_signature = np.array([1.0, 1.0]) second_signature = np.array([1.0, 1.0]) distance_matrix = np.array([[0.0, 1.0], [1.0, 0.0]]) emd_assert( emd(first_signature, second_signature, distance_matrix), 0.0 ) def test_emd_3(): first_signature = np.array([6.0, 1.0]) second_signature = np.array([1.0, 7.0]) distance_matrix = np.array([[0.0, 0.0], [0.0, 0.0]]) emd_assert( emd(first_signature, second_signature, distance_matrix), 0.0 ) def test_emd_4(): first_signature = np.array([1.0, 2.0, 1.0, 2.0]) second_signature = np.array([2.0, 1.0, 2.0, 1.0]) distance_matrix = np.array([[0.0, 1.0, 1.0, 2.0], [1.0, 0.0, 2.0, 1.0], [1.0, 2.0, 0.0, 1.0], [2.0, 1.0, 1.0, 0.0]]) emd_assert( emd(first_signature, second_signature, distance_matrix), 2.0 ) def test_emd_extra_mass_penalty(): first_signature = np.array([0.0, 2.0, 1.0, 2.0]) second_signature = np.array([2.0, 1.0, 2.0, 1.0]) distance_matrix = np.array([[0.0, 1.0, 1.0, 2.0], [1.0, 0.0, 2.0, 1.0], [1.0, 2.0, 0.0, 1.0], [2.0, 1.0, 1.0, 0.0]]) emd_assert( emd(first_signature, second_signature, distance_matrix, extra_mass_penalty=2.5), 4.5 ) # Validation def test_emd_validate_larger_signatures_1(): first_signature = np.array([0.0, 1.0, 2.0]) second_signature = np.array([5.0, 3.0, 3.0]) distance_matrix = np.array([[0.0, 0.5], [0.5, 0.0]]) with pytest.raises(ValueError): emd(first_signature, second_signature, distance_matrix) def test_emd_validate_larger_signatures_2(): first_signature = np.array([0.0, 1.0, 2.0]) second_signature = np.array([5.0, 3.0]) distance_matrix = np.array([[0.0, 0.5], [0.5, 0.0]]) with pytest.raises(ValueError): emd_with_flow(first_signature, second_signature, distance_matrix) def test_emd_validate_larger_signatures_3(): first_signature = np.array([0.0, 1.0]) second_signature = np.array([5.0, 3.0, 3.0]) distance_matrix = np.array([[0.0, 0.5], [0.5, 0.0]]) with pytest.raises(ValueError): emd(first_signature, second_signature, distance_matrix) def test_emd_validate_different_signature_dims(): first_signature = np.array([0.0, 1.0]) second_signature = np.array([5.0, 3.0, 3.0]) distance_matrix = np.array([[0.0, 0.5, 0.0], [0.5, 0.0, 0.0], [0.5, 0.0, 0.0]]) with pytest.raises(ValueError): emd(first_signature, second_signature, distance_matrix) def test_emd_validate_symmetric_distance_matrix(): first_signature = np.array([0.0, 1.0]) second_signature = np.array([5.0, 3.0]) distance_matrix = np.array([[0.0, 0.5, 3.0], [0.5, 0.0]]) with pytest.raises(ValueError): emd(first_signature, second_signature, distance_matrix) # `emd_with_flow()` # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ def test_emd_with_flow_1(): first_signature = np.array([0.0, 1.0]) second_signature = np.array([5.0, 3.0]) distance_matrix = np.array([[0.0, 0.5], [0.5, 0.0]]) emd_flow_assert( emd_with_flow(first_signature, second_signature, distance_matrix), (3.5, [[0.0, 0.0], [0.0, 1.0]]) ) def test_emd_with_flow_2(): first_signature = np.array([1.0, 1.0]) second_signature = np.array([1.0, 1.0]) distance_matrix = np.array([[0.0, 1.0], [1.0, 0.0]]) emd_flow_assert( emd_with_flow(first_signature, second_signature, distance_matrix), (0.0, [[1.0, 0.0], [0.0, 1.0]]) ) def test_emd_with_flow_3(): first_signature = np.array([6.0, 1.0]) second_signature = np.array([1.0, 7.0]) distance_matrix = np.array([[0.0, 0.0], [0.0, 0.0]]) emd_flow_assert( emd_with_flow(first_signature, second_signature, distance_matrix), (0.0, [[1.0, 5.0], [0.0, 1.0]]) ) def test_emd_with_flow_4(): first_signature = np.array([1.0, 7.0]) second_signature = np.array([6.0, 1.0]) distance_matrix = np.array([[0.0, 0.0], [0.0, 0.0]]) emd_flow_assert( emd_with_flow(first_signature, second_signature, distance_matrix), (0.0, [[1.0, 0.0], [5.0, 1.0]]) ) def test_emd_with_flow_5(): first_signature = np.array([3.0, 5.0]) second_signature = np.array([6.0, 2.0]) distance_matrix = np.array([[0.0, 0.0], [0.0, 0.0]]) emd_flow_assert( emd_with_flow(first_signature, second_signature, distance_matrix), (0.0, [[3.0, 0.0], [3.0, 2.0]]) ) def test_emd_with_flow_6(): first_signature = np.array([1.0, 2.0, 1.0, 2.0]) second_signature = np.array([2.0, 1.0, 2.0, 1.0]) distance_matrix = np.array([[0.0, 1.0, 1.0, 2.0], [1.0, 0.0, 2.0, 1.0], [1.0, 2.0, 0.0, 1.0], [2.0, 1.0, 1.0, 0.0]]) emd_flow_assert( emd_with_flow(first_signature, second_signature, distance_matrix), (2.0, [[1.0, 0.0, 0.0, 0.0], [1.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 0.0, 1.0, 1.0]]) ) def test_emd_with_flow_extra_mass_penalty(): first_signature = np.array([0.0, 2.0, 1.0, 2.0]) second_signature = np.array([2.0, 1.0, 2.0, 1.0]) distance_matrix = np.array([[0.0, 1.0, 1.0, 2.0], [1.0, 0.0, 2.0, 1.0], [1.0, 2.0, 0.0, 1.0], [2.0, 1.0, 1.0, 0.0]]) emd_flow_assert( emd_with_flow(first_signature, second_signature, distance_matrix, extra_mass_penalty=2.5), (4.5, [[0.0, 0.0, 0.0, 0.0], [1.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 0.0, 1.0, 1.0]]) ) # Validation def test_emd_with_flow_validate_larger_signatures_1(): first_signature = np.array([0.0, 1.0, 2.0]) second_signature = np.array([5.0, 3.0, 3.0]) distance_matrix = np.array([[0.0, 0.5], [0.5, 0.0]]) with pytest.raises(ValueError): emd_with_flow(first_signature, second_signature, distance_matrix) def test_emd_with_flow_validate_larger_signatures_2(): first_signature = np.array([0.0, 1.0, 2.0]) second_signature = np.array([5.0, 3.0]) distance_matrix = np.array([[0.0, 0.5], [0.5, 0.0]]) with pytest.raises(ValueError): emd(first_signature, second_signature, distance_matrix) def test_emd_with_flow_validate_larger_signatures_3(): first_signature = np.array([0.0, 1.0]) second_signature = np.array([5.0, 3.0, 3.0]) distance_matrix = np.array([[0.0, 0.5], [0.5, 0.0]]) with pytest.raises(ValueError): emd_with_flow(first_signature, second_signature, distance_matrix) def test_emd_with_flow_validate_different_signature_dims(): first_signature = np.array([0.0, 1.0]) second_signature = np.array([5.0, 3.0, 3.0]) distance_matrix = np.array([[0.0, 0.5, 0.0], [0.5, 0.0, 0.0], [0.5, 0.0, 0.0]]) with pytest.raises(ValueError): emd_with_flow(first_signature, second_signature, distance_matrix) def test_emd_with_flow_validate_square_distance_matrix(): first_signature = np.array([0.0, 1.0]) second_signature = np.array([5.0, 3.0]) distance_matrix = np.array([[0.0, 0.5, 3.0], [0.5, 0.0]]) with pytest.raises(ValueError): emd_with_flow(first_signature, second_signature, distance_matrix) # `emd_samples()` # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ def test_emd_samples_1(): first_array = [1, 2, 3, 4] second_array = [2, 3, 4, 5] emd_assert(emd_samples(first_array, second_array), 0.75) def test_emd_samples_1_binsize(): first_array = [1, 2, 3, 4] second_array = [2, 3, 4, 5] emd_assert(emd_samples(first_array, second_array, bins=2), 0.5) def test_emd_samples_1_manual_range(): first_array = [1, 2, 3, 4] second_array = [2, 3, 4, 5] emd_assert(emd_samples(first_array, second_array, range=(0, 10)), 1.0) def test_emd_samples_1_not_normalized(): first_array = [1, 2, 3, 4] second_array = [2, 3, 4, 5] emd_assert(emd_samples(first_array, second_array, normalized=False), 3.0) def test_emd_samples_1_custom_distance(): dist = lambda x: np.array([[0.0 if i == j else 1.0 for i in x] for j in x]) first_array = [1, 2, 3, 4] second_array = [2, 3, 4, 5] emd_assert(emd_samples(first_array, second_array, distance=dist), 0.25) def test_emd_samples_all_kwargs(): # Regression only; not checked by hand dist = lambda x: [ [(i - j)**3 for i in range(len(x))] for j in range(len(x)) ] first_array = [1, 2, 3, 4, 5] second_array = [2, 3, 4, 5] emd_assert( emd_samples(first_array, second_array, bins=30, normalized=False, range=(-5, 15), distance=dist), 24389.0 ) def test_emd_samples_2(): first_array = [1] second_array = [2] emd_assert(emd_samples(first_array, second_array), 0.5) def test_emd_samples_3(): first_array = [1, 1, 1, 2, 3] second_array = [1, 2, 2, 2, 3] emd_assert(emd_samples(first_array, second_array), 0.32) def test_emd_samples_4(): first_array = [1, 2, 3, 4, 5] second_array = [99, 98, 97, 96, 95] emd_assert(emd_samples(first_array, second_array), 78.4) def test_emd_samples_5(): first_array = [1] second_array = [1, 2, 3, 4, 5] emd_assert(emd_samples(first_array, second_array), 1.8) # Validation def test_emd_samples_validate_empty(): first_array = [] second_array = [1] with pytest.raises(ValueError): emd_samples(first_array, second_array) def test_emd_samples_validate_distance_matrix_square(): dist = lambda x: [[1, 2, 3]] first_array = [1, 2, 3] second_array = [1, 2, 3] with pytest.raises(ValueError): emd_samples(first_array, second_array, distance=dist) def test_emd_samples_validate_distance_matrix_size(): dist = lambda x: [[0, 1], [1, 0]] first_array = [1, 2, 3, 4] second_array = [1, 2, 3, 4] with pytest.raises(ValueError): emd_samples(first_array, second_array, distance=dist) pyemd-0.5.1/test_requirements.txt000066400000000000000000000000131323351540500171510ustar00rootroot00000000000000pytest tox pyemd-0.5.1/tox.ini000066400000000000000000000002061323351540500141450ustar00rootroot00000000000000[tox] envlist = py{27,34,35,36} [testenv] deps = -r{toxinidir}/test_requirements.txt commands = make test whitelist_externals = make