pax_global_header00006660000000000000000000000064140160301660014507gustar00rootroot0000000000000052 comment=19113b0a3496fed45bdf75740ff47f35bd420271 hdmedians-0.14.2/000077500000000000000000000000001401603016600135275ustar00rootroot00000000000000hdmedians-0.14.2/.gitignore000066400000000000000000000020441401603016600155170ustar00rootroot00000000000000.DS_Store # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so *.c # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *,cover .hypothesis/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # IPython Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # dotenv .env # virtualenv venv/ ENV/ # Spyder project settings .spyderproject # Rope project settings .ropeproject hdmedians-0.14.2/LICENSE000066400000000000000000000261351401603016600145430ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. hdmedians-0.14.2/Makefile000066400000000000000000000016601401603016600151720ustar00rootroot00000000000000 inplace: python3 setup.py build_ext -i test: inplace nosetests clean: @rm -fr build dist @rm -fr hdmedians/*.so @rm -fr hdmedians/*.c @rm -fr hdmedians.egg-info @rm -fr hdmedians/__pycache__ doc: docs/README_.md docs/plots.py #-- requires `pip3 install readme2tex cairosvg` @python3 -m readme2tex --output README.md --svgdir docs --project hdmedians --usepackage "stix" --rerender docs/README_.md @python3 docs/plots.py #-- hack to make images work @for f in $(wildcard docs/*.svg); do cairosvg -d 300 $$f -o $${f/svg/png}; done @sed -i~ -e 's/svg/png/g; s/rawgit/github/g; s/master/raw\\\/master/g' README.md @rm -fr *~ git rm --ignore-unmatch --cached $(wildcard docs/*.svg) $(wildcard docs/*.png) git add $(wildcard docs/*.svg) $(wildcard docs/*.png) git add README.md docs/README_.md git commit -m 'Update README' git push sdist: @rm -fr dist/ python3 setup.py sdist upload: python3 setup.py sdist register upload hdmedians-0.14.2/README.md000066400000000000000000000150341401603016600150110ustar00rootroot00000000000000# Hdmedians Did you know there is no unique way to mathematically extend the concept of a [median](https://en.wikipedia.org/wiki/Median) to higher dimensions? Various definitions for a **high-dimensional median** exist and this Python package provides a number of fast implementations of these definitions. Medians are extremely useful due to their high breakdown point (up to 50% contamination) and have a number of nice applications in machine learning, computer vision, and high-dimensional statistics.

This package currently has implementations of [medoid](#medoid) and [geometric median](#geometric-median) with support for missing data using `NaN`. ### Installation The latest version of the package is always available on [pypi](https://pypi.python.org/pypi/hdmedians), so can be easily installed by typing: ```{sh} pip3 install hdmedians ``` ## Medoid Given a finite set of -dimensional observation vectors , the [medoid](https://en.wikipedia.org/wiki/Medoid) of these observations is given by

The current implementation of `medoid` is in vectorized Python and can handle any data type supported by [ndarray](https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html). If you would like the algorithm to take care of missing values encoded as `nan` then you can use the `nanmedoid` function. ### Examples Create an 6 x 10 array of random integer observations. ```{python} >>> import numpy as np >>> X = np.random.randint(100, size=(6, 10)) array([[12, 9, 61, 76, 2, 17, 12, 11, 26, 0], [65, 72, 7, 64, 21, 92, 51, 48, 9, 65], [39, 7, 50, 56, 29, 79, 47, 45, 10, 52], [70, 12, 23, 97, 86, 14, 42, 90, 15, 16], [13, 7, 2, 47, 80, 53, 23, 59, 7, 15], [83, 2, 40, 12, 22, 75, 69, 61, 28, 53]]) ``` Find the medoid, taking the last axis as the number of observations. ```{python} >>> import hdmedians as hd >>> hd.medoid(X) array([12, 51, 47, 42, 23, 69]) ``` Take the first axis as the number of observations. ```{python} >>> hd.medoid(X, axis=0) array([39, 7, 50, 56, 29, 79, 47, 45, 10, 52]) ``` Since the medoid is one of the observations, the `medoid` function has the ability to only return the index if required. ```{python} >>> hd.medoid(X, indexonly=True) 6 >>> X[:,6] array([12, 51, 47, 42, 23, 69]) ``` ## Geometric Median The [geometric median](https://en.wikipedia.org/wiki/Geometric_median) is also known as the 1-median, spatial median, Euclidean minisum, or Torricelli point. Given a finite set of -dimensional observation vectors , the geometric median of these observations is given by

Note there is a subtle difference between the definition of the geometric median and the medoid: the search space for the solution differs and has the effect that the medoid returns one of the true observations whereas the geometric median can be described as a synthetic (not physically observed) observation. The current implementation of `geomedian` uses Cython and can handle `float64` or `float32`. If you would like the algorithm to take care of missing values encoded as `nan` then you can use the `nangeomedian` function. ### Examples Create an 6 x 10 array of random `float64` observations. ```{python} >>> import numpy as np >>> np.set_printoptions(precision=4, linewidth=200) >>> X = np.random.normal(1, size=(6, 10)) array([[ 1.1079, 0.5763, 0.3072, 1.2205, 0.8596, -1.5082, 2.5955, 2.8251, 1.5908, 0.4575], [ 1.555 , 1.7903, 1.213 , 1.1285, 0.0461, -0.4929, -0.1158, 0.5879, 1.5807, 0.5828], [ 2.1583, 3.4429, 0.4166, 1.0192, 0.8308, -0.1468, 2.6329, 2.2239, 0.2168, 0.8783], [ 0.7382, 1.9453, 0.567 , 0.6797, 1.1654, -0.1556, 0.9934, 0.1857, 1.369 , 2.1855], [ 0.1727, 0.0835, 0.5416, 1.4416, 1.6921, 1.6636, 1.6421, 1.0687, 0.6075, -0.0301], [ 2.6654, 1.6741, 1.1568, 1.3092, 1.6944, 0.2574, 2.8604, 1.6102, 0.4301, -0.3876]]) >>> X.dtype dtype('float64') ``` Find the geometric median, taking the last axis as the number of observations. ```{python} >>> import hdmedians as hd >>> np.array(hd.geomedian(X)) array([ 1.0733, 0.8974, 1.1935, 0.9122, 0.9975, 1.3422]) ``` Take the first axis as the number of observations. ```{python} >>> np.array(hd.geomedian(X, axis=0)) array([ 1.4581, 1.6377, 0.7147, 1.1257, 1.0493, -0.091 , 1.7907, 1.4168, 0.9587, 0.6195]) ``` Convert to `float32` and compute the geometric median. ```{python} >>> X = X.astype(np.float32) >>> m = hd.geomedian(X) ``` ## References * Small, C. G. (1990). [A survey of multidimensional medians](http://www.jstor.org/stable/1403809). *International Statistical Review/Revue Internationale de Statistique*, 263-277. hdmedians-0.14.2/docs/000077500000000000000000000000001401603016600144575ustar00rootroot00000000000000hdmedians-0.14.2/docs/16af742697014fc9e160526d074702c1.svg000066400000000000000000000030371401603016600211770ustar00rootroot00000000000000 hdmedians-0.14.2/docs/1f2a72531e5a196233138861065999e5.svg000066400000000000000000000322271401603016600210570ustar00rootroot00000000000000 hdmedians-0.14.2/docs/24a6cad3853187faa18a0cf58c6515c8.svg000066400000000000000000000377711401603016600215750ustar00rootroot00000000000000 hdmedians-0.14.2/docs/273457f251a6f8920e7b6c485c28b74f.svg000066400000000000000000000051751401603016600213710ustar00rootroot00000000000000 hdmedians-0.14.2/docs/2ec6e630f199f589a2402fdf3e0289d5.svg000066400000000000000000000050111401603016600215140ustar00rootroot00000000000000 hdmedians-0.14.2/docs/3701f7a72ee07d118cb3f467e211f9dd.svg000066400000000000000000000504421401603016600215640ustar00rootroot00000000000000 hdmedians-0.14.2/docs/4180412801f6665b049a3d631c19f4ef.svg000066400000000000000000003224211401603016600212600ustar00rootroot00000000000000 hdmedians-0.14.2/docs/4e63f132ed775dce27463f21d12530ad.svg000066400000000000000000000376771401603016600215140ustar00rootroot00000000000000 hdmedians-0.14.2/docs/75f7da71476ed18018bb65dc5103af7f.svg000066400000000000000000000112021401603016600215600ustar00rootroot00000000000000 hdmedians-0.14.2/docs/8ce46e21b12b0c15b3683b17029ce564.svg000066400000000000000000000166621401603016600214160ustar00rootroot00000000000000 hdmedians-0.14.2/docs/8f4b5485b718ef52ec71fce98d4342b0.svg000066400000000000000000000667501401603016600216150ustar00rootroot00000000000000 hdmedians-0.14.2/docs/95da31970158bdbe88c4b5eeeb7a01af.svg000066400000000000000000000422271401603016600220070ustar00rootroot00000000000000 hdmedians-0.14.2/docs/97c2c0ac5d7c079601abd56a54c9475c.svg000066400000000000000000000036111401603016600215610ustar00rootroot00000000000000 hdmedians-0.14.2/docs/9a69db764ef42f26b8985aeca3d39ba9.svg000066400000000000000000000341771401603016600217630ustar00rootroot00000000000000 hdmedians-0.14.2/docs/README_.md000066400000000000000000000121301401603016600160720ustar00rootroot00000000000000# Hdmedians Did you know there is no unique way to mathematically extend the concept of a [median](https://en.wikipedia.org/wiki/Median) to higher dimensions? Various definitions for a **high-dimensional median** exist and this Python package provides a number of fast implementations of these definitions. Medians are extremely useful due to their high breakdown point (up to 50% contamination) and have a number of nice applications in machine learning, computer vision, and high-dimensional statistics.

This package currently has implementations of [medoid](#medoid) and [geometric median](#geometric-median) with support for missing data using `NaN`. ### Installation The latest version of the package is always available on [pypi](https://pypi.python.org/pypi/hdmedians), so can be easily installed by typing: ```{sh} pip3 install hdmedians ``` ## Medoid Given a finite set $\mathbb{X}$ of $p$-dimensional observation vectors $\mathbb{X}=\{\mathbf{x}_1, \ldots, \mathbf{x}_n\}$, the [medoid](https://en.wikipedia.org/wiki/Medoid) $\mathbf{m}$ of these observations is given by $$ \mathbf{m} := \operatorname{argmin}_{\mathbf{x} \in \mathbb{X}} \sum_{i=1}^n \|\mathbf{x} - \mathbf{x}_i\|. $$ The current implementation of `medoid` is in vectorized Python and can handle any data type supported by [ndarray](https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html). If you would like the algorithm to take care of missing values encoded as `nan` then you can use the `nanmedoid` function. ### Examples Create an 6 x 10 array of random integer observations. ```{python} >>> import numpy as np >>> X = np.random.randint(100, size=(6, 10)) array([[12, 9, 61, 76, 2, 17, 12, 11, 26, 0], [65, 72, 7, 64, 21, 92, 51, 48, 9, 65], [39, 7, 50, 56, 29, 79, 47, 45, 10, 52], [70, 12, 23, 97, 86, 14, 42, 90, 15, 16], [13, 7, 2, 47, 80, 53, 23, 59, 7, 15], [83, 2, 40, 12, 22, 75, 69, 61, 28, 53]]) ``` Find the medoid, taking the last axis as the number of observations. ```{python} >>> import hdmedians as hd >>> hd.medoid(X) array([12, 51, 47, 42, 23, 69]) ``` Take the first axis as the number of observations. ```{python} >>> hd.medoid(X, axis=0) array([39, 7, 50, 56, 29, 79, 47, 45, 10, 52]) ``` Since the medoid is one of the observations, the `medoid` function has the ability to only return the index if required. ```{python} >>> hd.medoid(X, indexonly=True) 6 >>> X[:,6] array([12, 51, 47, 42, 23, 69]) ``` ## Geometric Median The [geometric median](https://en.wikipedia.org/wiki/Geometric_median) is also known as the 1-median, spatial median, Euclidean minisum, or Torricelli point. Given a finite set $\mathbb{X}$ of $p$-dimensional observation vectors $\mathbb{X}=\{\mathbf{x}_1, \ldots, \mathbf{x}_n\}$, the geometric median $\hat{\mu}$ of these observations is given by $$ \hat \mu := \operatorname{argmin}_{\mathbf{x} \in \mathbb{R}^p} \sum_{i=1}^n \|\mathbf{x} - \mathbf{x}_i\|. $$ Note there is a subtle difference between the definition of the geometric median and the medoid: the search space for the solution differs and has the effect that the medoid returns one of the true observations whereas the geometric median can be described as a synthetic (not physically observed) observation. The current implementation of `geomedian` uses Cython and can handle `float64` or `float32`. If you would like the algorithm to take care of missing values encoded as `nan` then you can use the `nangeomedian` function. ### Examples Create an 6 x 10 array of random `float64` observations. ```{python} >>> import numpy as np >>> np.set_printoptions(precision=4, linewidth=200) >>> X = np.random.normal(1, size=(6, 10)) array([[ 1.1079, 0.5763, 0.3072, 1.2205, 0.8596, -1.5082, 2.5955, 2.8251, 1.5908, 0.4575], [ 1.555 , 1.7903, 1.213 , 1.1285, 0.0461, -0.4929, -0.1158, 0.5879, 1.5807, 0.5828], [ 2.1583, 3.4429, 0.4166, 1.0192, 0.8308, -0.1468, 2.6329, 2.2239, 0.2168, 0.8783], [ 0.7382, 1.9453, 0.567 , 0.6797, 1.1654, -0.1556, 0.9934, 0.1857, 1.369 , 2.1855], [ 0.1727, 0.0835, 0.5416, 1.4416, 1.6921, 1.6636, 1.6421, 1.0687, 0.6075, -0.0301], [ 2.6654, 1.6741, 1.1568, 1.3092, 1.6944, 0.2574, 2.8604, 1.6102, 0.4301, -0.3876]]) >>> X.dtype dtype('float64') ``` Find the geometric median, taking the last axis as the number of observations. ```{python} >>> import hdmedians as hd >>> np.array(hd.geomedian(X)) array([ 1.0733, 0.8974, 1.1935, 0.9122, 0.9975, 1.3422]) ``` Take the first axis as the number of observations. ```{python} >>> np.array(hd.geomedian(X, axis=0)) array([ 1.4581, 1.6377, 0.7147, 1.1257, 1.0493, -0.091 , 1.7907, 1.4168, 0.9587, 0.6195]) ``` Convert to `float32` and compute the geometric median. ```{python} >>> X = X.astype(np.float32) >>> m = hd.geomedian(X) ``` ## References * Small, C. G. (1990). [A survey of multidimensional medians](http://www.jstor.org/stable/1403809). *International Statistical Review/Revue Internationale de Statistique*, 263-277. hdmedians-0.14.2/docs/af3d250893976cd65ed71ec1c3590423.svg000066400000000000000000000112711401603016600214310ustar00rootroot00000000000000 hdmedians-0.14.2/docs/cc257b70884a684ce6ddee390d176508.svg000066400000000000000000000343441401603016600215260ustar00rootroot00000000000000 hdmedians-0.14.2/docs/d860dc5ef8ee9b2127def8a8ed2ddebd.svg000066400000000000000000000371521401603016600223270ustar00rootroot00000000000000 hdmedians-0.14.2/docs/e2ab5aaffe776fde1073a90f83f75a77.svg000066400000000000000000000347211401603016600220220ustar00rootroot00000000000000 hdmedians-0.14.2/docs/e434f6de38c995d9cdab973d767d796a.svg000066400000000000000000000532121401603016600217110ustar00rootroot00000000000000 hdmedians-0.14.2/docs/e9e392f5b0eee55a2e294bf737406c2f.svg000066400000000000000000000061131401603016600216540ustar00rootroot00000000000000 hdmedians-0.14.2/docs/ec16505f0720df975846ae08cf9bbd39.svg000066400000000000000000000465121401603016600216040ustar00rootroot00000000000000 hdmedians-0.14.2/docs/f72bcdd87e43d7d06e1fe084f5255519.svg000066400000000000000000000377441401603016600216150ustar00rootroot00000000000000 hdmedians-0.14.2/docs/fb2c407771af04095047a75aab1127e2.svg000066400000000000000000000052011401603016600213730ustar00rootroot00000000000000 hdmedians-0.14.2/docs/fb31cf585f23aa9aadb4bd16aa2d71f8.svg000066400000000000000000000065241401603016600221320ustar00rootroot00000000000000 hdmedians-0.14.2/docs/fig1.svg000066400000000000000000001251571401603016600160410ustar00rootroot00000000000000 hdmedians-0.14.2/docs/plots.py000066400000000000000000000027471401603016600162040ustar00rootroot00000000000000import numpy as np import hdmedians as hd import matplotlib.pyplot as plt def pcoord(X, c=None): n, p = X.shape dims = range(1, p+1) for obs in X: plt.plot(dims, obs, c=c) from sklearn import datasets iris = datasets.load_iris() X = iris.data y = iris.target X1 = X[y==1] plt.figure(figsize=(10,3), dpi=200) pcoord(X1, c='#aaaaaa') md = hd.medoid(X1, axis=0) gm = hd.geomedian(X1, axis=0) xx = np.arange(X.shape[1])+1 plt.plot(xx, md, c='m', ls='--', lw=2, label='Medoid') plt.plot(xx, gm, c='r', ls='-', lw=2, label='Geometric Median') plt.xticks(xx, iris.feature_names) plt.title('Iris data set (' + iris.target_names[1].title() + ' class)') plt.grid(color='k', ls=':', axis='x') plt.legend(framealpha=1.0) plt.savefig('docs/fig1.svg') # n, p = (40, 20) # loc1 = np.random.normal(1, 2.0, size=(p,)) # loc2 = loc1 + np.random.normal(1.0, 1.0, size=(p,)) # sd = np.random.uniform(0.1, 0.2, size=(p,)) # X1 = np.random.normal(loc=loc1, scale=sd, size=(n, p)) # X2 = np.random.normal(loc=loc2, scale=sd, size=(n, p)) # X = np.vstack([X1,X2]) # plt.figure(figsize=(8,4)) # pcoord(X2, c='#aaaaaa') # md = hd.medoid(X2, axis=0) # gm = hd.geomedian(X2, axis=0) # plt.plot(range(1, X2.shape[1]+1), md, c='m', ls='--', lw=2) # plt.plot(range(1, X2.shape[1]+1), gm, c='r', ls='--', lw=2) # plt.savefig('docs/fig1.png') # plt.figure(figsize=(8,4)) # pcoord(X1, c='#aaaaaa') # pcoord(X2, c='#555555') # m = hd.geomedian(X, axis=0) # plt.plot(range(1, 7), m, c='m') # plt.savefig('docs/fig2.png') hdmedians-0.14.2/hdmedians/000077500000000000000000000000001401603016600154635ustar00rootroot00000000000000hdmedians-0.14.2/hdmedians/.pylintrc000077700000000000000000000000001401603016600214032../.pylintrcustar00rootroot00000000000000hdmedians-0.14.2/hdmedians/__init__.py000066400000000000000000000002731401603016600175760ustar00rootroot00000000000000# Copyright (C) 2016-2017 Dale Roberts - All Rights Reserved from __future__ import absolute_import from .medoid import medoid, nanmedoid from .geomedian import geomedian, nangeomedian hdmedians-0.14.2/hdmedians/geomedian.pyx000066400000000000000000000275471401603016600201740ustar00rootroot00000000000000# Copyright (C) 2017 Dale Roberts - All Rights Reserved # cython: cdivision=True # cython: boundscheck=False # cython: nonecheck=False # cython: wraparound=False import numpy as np import warnings from libc.math cimport isnan, sqrt, acos, fabs cimport numpy as cnp ctypedef fused floating: cnp.float32_t cnp.float64_t ctypedef cnp.float32_t float32_t ctypedef cnp.float64_t float64_t cdef floating dot(floating[:] x, floating[:] y) nogil: cdef size_t n = x.shape[0] cdef size_t i = 0 cdef float64_t result = 0. for i in range(n): result += x[i] * y[i] return result cdef floating sum(floating[:] x) nogil: cdef size_t n = x.shape[0] cdef float64_t total = 0. for i in range(n): total += x[i] return total cdef floating nansum(floating[:] x) nogil: cdef size_t n = x.shape[0] cdef float64_t total = 0. for i in range(n): if not isnan(x[i]): total += x[i] return total cdef floating dist_naneuclidean(floating[:] x, floating[:] y) nogil: cdef size_t n = x.shape[0] cdef float64_t d = 0. cdef float64_t tmp for i in range(n): if (not isnan(x[i])) and (not isnan(y[i])): tmp = x[i] - y[i] d += tmp * tmp return sqrt(d) cdef floating dist_euclidean(floating[:] x, floating[:] y) nogil: cdef size_t n = x.shape[0] cdef float64_t d = 0. cdef float64_t tmp for i in range(n): tmp = x[i] - y[i] d += tmp * tmp return sqrt(d) cdef floating norm_euclidean(floating[:] x) nogil: cdef size_t n = x.shape[0] cdef float64_t d = 0. for i in range(n): d += x[i] * x[i] return sqrt(d) cdef geomedian_axis_zero(floating[:, :] X, floating eps=1e-7, size_t maxiters=500): cdef size_t p = X.shape[0] cdef size_t n = X.shape[1] cdef floating[:] y = np.mean(X, axis=0) if p == 0: return y if floating is cnp.float32_t: dtype = np.float32 else: dtype = np.float64 cdef floating[:] D = np.empty(p, dtype=dtype) cdef floating[:] Dinv = np.empty(p, dtype=dtype) cdef floating[:] W = np.empty(p, dtype=dtype) cdef floating[:] T = np.empty(n, dtype=dtype) cdef floating[:] y1 = np.empty(n, dtype=dtype) cdef floating[:] R = np.empty(n, dtype=dtype) cdef floating dist, Dinvs, total, r, rinv, tmp, Di cdef size_t nzeros = p cdef size_t iteration with nogil: iteration = 0 while iteration < maxiters: for i in range(p): Di = dist_euclidean(X[i, :], y) if fabs(Di) > eps: Dinv[i] = 1. / Di else: Dinv[i] = 0. D[i] = Di Dinvs = sum(Dinv) for i in range(p): W[i] = Dinv[i] / Dinvs for j in range(n): total = 0. for i in range(p): if fabs(D[i]) > eps: total += W[i] * X[i, j] T[j] = total nzeros = p for i in range(p): if fabs(D[i]) > eps: nzeros -= 1 if nzeros == 0: y1 = T elif nzeros == p: break else: for j in range(n): R[j] = (T[j] - y[j]) * Dinvs r = norm_euclidean(R) if r > eps: rinv = nzeros/r else: rinv = 0. for j in range(n): y1[j] = max(0, 1-rinv)*T[j] + min(1, rinv)*y[j] dist = dist_euclidean(y, y1) if dist < eps: break y[:] = y1 iteration = iteration + 1 return y cdef geomedian_axis_one(floating[:, :] X, floating eps=1e-7, size_t maxiters=500): cdef size_t p = X.shape[0] cdef size_t n = X.shape[1] cdef floating[:] y = np.mean(X, axis=1) if n == 1: return y if floating is cnp.float32_t: dtype = np.float32 else: dtype = np.float64 cdef floating[:] D = np.empty(n, dtype=dtype) cdef floating[:] Dinv = np.empty(n, dtype=dtype) cdef floating[:] W = np.empty(n, dtype=dtype) cdef floating[:] T = np.empty(p, dtype=dtype) cdef floating[:] y1 = np.empty(p, dtype=dtype) cdef floating[:] R = np.empty(p, dtype=dtype) cdef floating dist, Dinvs, total, r, rinv, tmp, Di cdef size_t nzeros = n cdef size_t iteration with nogil: iteration = 0 while iteration < maxiters: for i in range(n): Di = dist_euclidean(X[:, i], y) D[i] = Di if fabs(Di) > eps: Dinv[i] = 1. / Di else: Dinv[i] = 0. Dinvs = sum(Dinv) for i in range(n): W[i] = Dinv[i] / Dinvs for j in range(p): total = 0. for i in range(n): if fabs(D[i]) > eps: total += W[i] * X[j, i] T[j] = total nzeros = n for i in range(n): if fabs(D[i]) > eps: nzeros -= 1 if nzeros == 0: y1 = T elif nzeros == n: break else: for j in range(p): R[j] = (T[j] - y[j]) * Dinvs r = norm_euclidean(R) if r > eps: rinv = nzeros/r else: rinv = 0. for j in range(p): y1[j] = max(0, 1-rinv)*T[j] + min(1, rinv)*y[j] dist = dist_euclidean(y, y1) if dist < eps: break y[:] = y1 iteration = iteration + 1 return y cdef nangeomedian_axis_zero(floating[:, :] X, floating eps=1e-7, size_t maxiters=500): cdef size_t p = X.shape[0] cdef size_t n = X.shape[1] cdef floating nan = float('NaN') cdef floating[:] y = np.nanmean(X, axis=0) if floating is cnp.float32_t: dtype = np.float32 else: dtype = np.float64 cdef floating[:] D = np.empty(p, dtype=dtype) cdef floating[:] Dinv = np.empty(p, dtype=dtype) cdef floating[:] W = np.empty(p, dtype=dtype) cdef floating[:] T = np.empty(n, dtype=dtype) cdef floating[:] y1 = np.empty(n, dtype=dtype) cdef floating[:] R = np.empty(n, dtype=dtype) cdef floating dist, Dinvs, total, r, rinv, tmp, Di cdef size_t nzeros = p cdef size_t iteration with nogil: iteration = 0 while iteration < maxiters: for i in range(p): Di = dist_euclidean(X[i, :], y) if fabs(Di) > 0.: Dinv[i] = 1. / Di else: Dinv[i] = nan D[i] = Di Dinvs = nansum(Dinv) for i in range(p): W[i] = Dinv[i] / Dinvs for j in range(n): total = 0. for i in range(p): tmp = W[i] * X[i, j] if not isnan(tmp): total += tmp T[j] = total nzeros = p for i in range(p): if isnan(D[i]): nzeros -= 1 elif fabs(D[i]) > 0.: nzeros -= 1 if nzeros == 0: y1 = T elif nzeros == p: break else: for j in range(n): R[j] = (T[j] - y[j]) * Dinvs r = norm_euclidean(R) if r > 0.: rinv = nzeros/r else: rinv = 0. for j in range(n): y1[j] = max(0, 1-rinv)*T[j] + min(1, rinv)*y[j] dist = dist_euclidean(y, y1) if dist < eps: break y[:] = y1 iteration = iteration + 1 return y1 cdef nangeomedian_axis_one(floating[:, :] X, floating eps=1e-7, size_t maxiters=500): cdef size_t p = X.shape[0] cdef size_t n = X.shape[1] cdef floating nan = float('NaN') cdef floating[:] y = np.nanmean(X, axis=1) if floating is cnp.float32_t: dtype = np.float32 else: dtype = np.float64 cdef floating[:] D = np.empty(n, dtype=dtype) cdef floating[:] Dinv = np.empty(n, dtype=dtype) cdef floating[:] W = np.empty(n, dtype=dtype) cdef floating[:] T = np.empty(p, dtype=dtype) cdef floating[:] y1 = np.empty(p, dtype=dtype) cdef floating[:] R = np.empty(p, dtype=dtype) cdef floating dist, Dinvs, total, r, rinv, tmp, Di cdef size_t nzeros = n cdef size_t iteration with nogil: iteration = 0 while iteration < maxiters: for i in range(n): Di = dist_euclidean(X[:, i], y) if fabs(Di) > 0.: Dinv[i] = 1. / Di else: Dinv[i] = nan D[i] = Di Dinvs = nansum(Dinv) for i in range(n): W[i] = Dinv[i] / Dinvs for j in range(p): total = 0. for i in range(n): tmp = W[i] * X[j, i] if not isnan(tmp): total += tmp T[j] = total nzeros = n for i in range(n): if isnan(D[i]): nzeros -= 1 elif fabs(D[i]) > 0.: nzeros -= 1 if nzeros == 0: y1 = T elif nzeros == n: break else: for j in range(p): R[j] = (T[j] - y[j]) * Dinvs r = norm_euclidean(R) if r > 0.: rinv = nzeros/r else: rinv = 0. for j in range(p): y1[j] = max(0, 1-rinv)*T[j] + min(1, rinv)*y[j] dist = dist_euclidean(y, y1) if dist < eps: break y[:] = y1 iteration = iteration + 1 return y1 cpdef geomedian(floating[:, :] X, size_t axis=1, floating eps=1e-8, size_t maxiters=1000): """Calculates a Geometric Median for an array `X` of shape (p,n). If the median is calculated across axis=1 (default) (the axis of size n) an array of size (p,1) is returned. """ if axis == 0: return geomedian_axis_zero(X, eps, maxiters) if axis == 1: return geomedian_axis_one(X, eps, maxiters) raise IndexError("axis {} out of bounds".format(axis)) cpdef nangeomedian(floating[:, :] X, size_t axis=1, floating eps=1e-7, size_t maxiters=500): """Calculates a Geometric Median for an array `X` of shape (p,n). If the median is calculated across axis=1 (default) (the axis of size n) an array of size (p,1) is returned. Missing values should be assigned as `np.nan`. """ if axis == 0: ngood = np.count_nonzero(~np.isnan(X).any(axis=1)) if ngood == 0: raise ValueError("All-NaN slice encountered") elif ngood < 3: return np.nanmedian(X, axis=axis) else: return nangeomedian_axis_zero(X, eps, maxiters) if axis == 1: ngood = np.count_nonzero(~np.isnan(X).any(axis=0)) if ngood == 0: raise ValueError("All-NaN slice encountered") elif ngood < 3: return np.nanmedian(X, axis=axis) else: return nangeomedian_axis_one(X, eps, maxiters) raise IndexError("axis {} out of bounds".format(axis)) hdmedians-0.14.2/hdmedians/medoid.py000066400000000000000000000050311401603016600172750ustar00rootroot00000000000000# Copyright (C) 2016-2017 Dale Roberts - All Rights Reserved """ Medoid. """ import numpy as np def medoid(a, axis=1, indexonly=False): """ Compute the medoid along the specified axis. Returns the medoid of the array elements. Parameters ---------- a : array_like Input array or object that can be converted to an array. axis : int Axis along which the medoid is computed. The default is to compute the median along the last axis of the array. indexonly : bool, optional If this is set to True, only the index of the medoid is returned. Returns ------- medoid : ndarray or int """ if axis == 1: diff = a.T[:, None, :] - a.T ssum = np.einsum('ijk,ijk->ij', diff, diff) idx = np.argmin(np.sum(np.sqrt(ssum), axis=1)) if indexonly: return idx else: return a[:, idx] if axis == 0: diff = a[:, None, :] - a ssum = np.einsum('ijk,ijk->ij', diff, diff) idx = np.argmin(np.sum(np.sqrt(ssum), axis=1)) if indexonly: return idx else: return a[idx, :] raise IndexError("axis {} out of bounds".format(axis)) def nanmedoid(a, axis=1, indexonly=False): """ Compute the medoid along the specified axis, omitting observations containing NaNs. Returns the medoid of the array elements. Parameters ---------- a : array_like Input array or object that can be converted to an array. axis : int Axis along which the medoid is computed. The default is to compute the median along the last axis of the array. indexonly : bool, optional If this is set to True, only the index of the medoid is returned. Returns ------- medoid : ndarray or int """ if axis == 1: diff = a.T[:, None, :] - a.T ssum = np.einsum('ijk,ijk->ij', diff, diff) dist = np.nansum(np.sqrt(ssum), axis=1) mask = np.isnan(a).any(axis=0) dist[mask] = np.nan idx = np.nanargmin(dist) if indexonly: return idx else: return a[:, idx] if axis == 0: diff = a[:, None, :] - a ssum = np.einsum('ijk,ijk->ij', diff, diff) dist = np.nansum(np.sqrt(ssum), axis=1) mask = np.isnan(a).any(axis=1) dist[mask] = np.nan idx = np.nanargmin(dist) if indexonly: return idx else: return a[idx, :] raise IndexError("axis {} out of bounds".format(axis)) hdmedians-0.14.2/hdmedians/tests/000077500000000000000000000000001401603016600166255ustar00rootroot00000000000000hdmedians-0.14.2/hdmedians/tests/.DS_Store000066400000000000000000000140041401603016600203070ustar00rootroot00000000000000Bud1cache_ __pycache__lg1ScompÖ __pycache__moDDdutcÔ¹qS __pycache__modDdutcÔ¹qS __pycache__ph1Scomp   @€ @€ @€ @ EDSDB `€ @€ @€ @hdmedians-0.14.2/hdmedians/tests/.pylintrc000077700000000000000000000000001401603016600225452../.pylintrcustar00rootroot00000000000000hdmedians-0.14.2/hdmedians/tests/__init__.py000066400000000000000000000000001401603016600207240ustar00rootroot00000000000000hdmedians-0.14.2/hdmedians/tests/test_geomedian.py000066400000000000000000000105751401603016600221760ustar00rootroot00000000000000""" Tests. """ import numpy as np import hdmedians as hd from numpy.testing import assert_equal, assert_array_almost_equal from nose.tools import assert_true, assert_raises # shape (6, 25) DATA1 = np.array([[693, 990, 1281, 2101, 3524, 2577], [606, 898, 1128, 1962, 2992, 2106], [509, 831, 932, 2287, 3113, 2188], [466, 796, 870, 2380, 2903, 1953], [527, 814, 888, 2456, 2835, 1841], [721, 966, 1227, 2249, 3577, 2693], [670, 926, 1213, 2218, 3574, 2719], [809, 1058, 1375, 2272, 3860, 2936], [864, 1115, 1454, 2299, 3630, 2843], [793, 1029, 1353, 2212, 3774, 3010], [849, 1143, 1592, 2483, 4121, 3138], [847, 1149, 1606, 2472, 4179, 3326], [841, 1146, 1609, 2487, 4200, 3375], [893, 1169, 1640, 2525, 4191, 3302], [833, 1099, 1556, 2478, 4190, 3367], [690, 978, 1296, 2603, 3844, 2945], [364, 706, 548, 3763, 2381, 1273], [666, 1084, 1527, 3130, 3665, 2435], [500, 749, 938, 2499, 3031, 2146], [558, 821, 1082, 2384, 3259, 2341], [756, 1058, 1456, 2287, 3306, 2501], [478, 590, 798, 1105, 1385, 990], [482, 710, 972, 1909, 2769, 1822], [248, 618, 378, 3899, 1921, 938], [308, 659, 522, 3281, 1987, 1100]]).T def test_geomedian_shape_noaxis(): a = np.random.normal(1, size=(6, 10)) m = hd.geomedian(a) assert_equal(m.shape, (6, )) def test_geomedian_shape_axis_zero(): a = np.random.normal(1, size=(6, 10)) m = hd.geomedian(a, axis=0) assert_equal(m.shape, (10, )) def test_geomedian_shape_axis_one(): a = np.random.normal(1, size=(6, 10)) m = hd.geomedian(a, axis=1) assert_equal(m.shape, (6, )) def test_geomedian_noaxis(): m = hd.geomedian(DATA1.astype(np.float32)) r = np.array([684.9332, 962.1752, 1247.556, 2340.647, 3473.594, 2584.103]) assert_array_almost_equal(m, r, decimal=3) def test_geomedian_axis_zero(): m = hd.geomedian(DATA1.astype(np.float32), axis=0) r = np.array([1374.029, 1206.014, 1107.71, 1045.085, 1059.543, 1362.539, 1341.17, 1496.717, 1542.327, 1475.737, 1667.315, 1689.629, 1694.82, 1718.672, 1655.421, 1444.762, 861.6774, 1573.617, 1099.888, 1211.265, 1483.749, 772.4884, 1042.317, 709.8128, 777.8592]) assert_array_almost_equal(m, r, decimal=3) def test_geomedian_axis_one(): m = hd.geomedian(DATA1.astype(np.float32), axis=1) r = np.array([684.9332, 962.1752, 1247.556, 2340.647, 3473.594, 2584.103]) assert_array_almost_equal(m, r, decimal=3) def test_geomedian_same_values(): data = np.ones((4, 2)) m = hd.geomedian(data, axis=1) print(np.sum(m)) r = np.median(data, axis=1) assert_array_almost_equal(m, r, decimal=3) def test_geomedian_1d(): data = np.ones((1, 3)) m = hd.geomedian(data, axis=1) r = np.median(data, axis=1) assert_array_almost_equal(m, r, decimal=3) def test_geomedian_one_obs(): data = np.array([[1.0, 2.0, 1.0]]) m = hd.geomedian(data, axis=0) r = np.array([1.0, 2.0, 1.0]) assert_array_almost_equal(m, r, decimal=3) def test_geomedian_two_obs(): data = np.array([[1.0, 2.0, 1.0], [2.0, 1.0, 1.0]]) m = hd.geomedian(data, axis=0) r = np.array([1.5, 1.5, 1.0]) assert_array_almost_equal(m, r, decimal=3) def test_nangeomedian_axis_zero_one_good(): data = np.array([[1.0, np.nan, 1.0], [2.0, 1.0, 1.0]]) m = hd.nangeomedian(data, axis=0) r = np.nanmedian(data, axis=0) assert_array_almost_equal(m, r, decimal=3) def test_nangeomedian_axis_one_two_good(): data = np.array([[1.0, np.nan, 1.0], [2.0, 1.0, 1.0]]) m = hd.nangeomedian(data, axis=1) r = np.nanmedian(data, axis=1) assert_array_almost_equal(m, r, decimal=3) def test_nangeomedian_axis_bad(): data = np.array([[1.0, np.nan, 1.0], [2.0, 1.0, 1.0]]) assert_raises(IndexError, hd.nangeomedian, data, axis=2) def test_nangeomedian_all_nan(): data = np.array([[np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan]]) assert_raises(ValueError, hd.nangeomedian, data) hdmedians-0.14.2/hdmedians/tests/test_medoid.py000066400000000000000000000125151401603016600215030ustar00rootroot00000000000000""" Tests. """ import numpy as np import hdmedians as hd from numpy.testing import assert_equal, assert_array_almost_equal from nose.tools import assert_true, assert_raises # shape (6, 25) DATA1 = np.array([[693, 990, 1281, 2101, 3524, 2577], [606, 898, 1128, 1962, 2992, 2106], [509, 831, 932, 2287, 3113, 2188], [466, 796, 870, 2380, 2903, 1953], [527, 814, 888, 2456, 2835, 1841], [721, 966, 1227, 2249, 3577, 2693], [670, 926, 1213, 2218, 3574, 2719], [809, 1058, 1375, 2272, 3860, 2936], [864, 1115, 1454, 2299, 3630, 2843], [793, 1029, 1353, 2212, 3774, 3010], [849, 1143, 1592, 2483, 4121, 3138], [847, 1149, 1606, 2472, 4179, 3326], [841, 1146, 1609, 2487, 4200, 3375], [893, 1169, 1640, 2525, 4191, 3302], [833, 1099, 1556, 2478, 4190, 3367], [690, 978, 1296, 2603, 3844, 2945], [364, 706, 548, 3763, 2381, 1273], [666, 1084, 1527, 3130, 3665, 2435], [500, 749, 938, 2499, 3031, 2146], [558, 821, 1082, 2384, 3259, 2341], [756, 1058, 1456, 2287, 3306, 2501], [478, 590, 798, 1105, 1385, 990], [482, 710, 972, 1909, 2769, 1822], [248, 618, 378, 3899, 1921, 938], [308, 659, 522, 3281, 1987, 1100]]).T def test_medoid_shape_noaxis(): a = np.random.normal(1, size=(6, 10)) m = hd.medoid(a) assert_equal(m.shape, (6, )) def test_medoid_shape_axis_zero(): a = np.random.normal(1, size=(6, 10)) m = hd.medoid(a, axis=0) assert_equal(m.shape, (10, )) def test_medoid_shape_axis_one(): a = np.random.normal(1, size=(6, 10)) m = hd.medoid(a, axis=1) assert_equal(m.shape, (6, )) def test_medoid_in_set_random(): a = np.random.normal(1, size=(6, 10)) s = [list(x) for x in a.T] m = hd.medoid(a) idx = s.index(list(m)) assert_true(idx > -1) def test_medoid_noaxis(): m = hd.medoid(DATA1) r = np.array([721, 966, 1227, 2249, 3577, 2693]) assert_equal(m, r) def test_medoid_axis_zero(): m = hd.medoid(DATA1, axis=0) r = np.array([1281, 1128, 932, 870, 888, 1227, 1213, 1375, 1454, 1353, 1592, 1606, 1609, 1640, 1556, 1296, 548, 1527, 938, 1082, 1456, 798, 972, 378, 522]) assert_equal(m, r) def test_medoid_axis_one(): m = hd.medoid(DATA1, axis=1) r = np.array([721, 966, 1227, 2249, 3577, 2693]) assert_equal(m, r) def test_medoid_axis_bad(): assert_raises(IndexError, hd.medoid, DATA1, axis=2) def test_medoid_noaxis_indexonly(): m = hd.medoid(DATA1, indexonly=True) assert_equal(m, 5) def test_medoid_axis_zero_indexonly(): m = hd.medoid(DATA1, axis=0, indexonly=True) assert_equal(m, 2) def test_medoid_axis_one_indexonly(): m = hd.medoid(DATA1, axis=1, indexonly=True) assert_equal(m, 5) def test_medoid_1d(): data = np.ones((1, 3)) m = hd.medoid(data, axis=1) r = np.median(data, axis=1) assert_array_almost_equal(m, r, decimal=3) def test_medoid_one_obs(): data = np.array([[1.0, 2.0, 1.0]]) m = hd.medoid(data, axis=0) r = np.array([1.0, 2.0, 1.0]) assert_array_almost_equal(m, r, decimal=3) def test_medoid_two_obs(): data = np.array([[1.0, 2.0, 1.0], [2.0, 1.0, 1.0]]) m = hd.medoid(data, axis=0) r = np.array([1.0, 2.0, 1.0]) assert_array_almost_equal(m, r, decimal=3) def test_nanmedoid_two_obs(): data = np.array([[1.0, np.nan, 1.0], [2.0, 1.0, 1.0]]) m = hd.nanmedoid(data, axis=0) r = np.array([2.0, 1.0, 1.0]) assert_array_almost_equal(m, r, decimal=3) def test_nanmedoid_all_nan(): data = np.array([[np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan]]) assert_raises(ValueError, hd.nanmedoid, data) def test_nanmedoid_axis_zero(): data = np.array([[1.0, np.nan, 1.0], [2.0, 1.0, 1.0]]) m = hd.nanmedoid(data, axis=0) r = np.array([2.0, 1.0, 1.0]) assert_array_almost_equal(m, r, decimal=3) def test_nanmedoid_axis_one(): data = np.array([[1.0, np.nan, 1.0], [2.0, 1.0, 1.0]]) m = hd.nanmedoid(data, axis=1) r = np.array([1.0, 2.0]) assert_array_almost_equal(m, r, decimal=3) def test_nanmedoid_axis_zero_indexonly(): data = np.array([[1.0, np.nan, 1.0], [2.0, 1.0, 1.0]]) m = hd.nanmedoid(data, axis=0, indexonly=True) assert_equal(m, 1) def test_nanmedoid_axis_one_indexonly(): data = np.array([[1.0, np.nan, 1.0], [2.0, 1.0, 1.0]]) m = hd.nanmedoid(data, axis=1, indexonly=True) assert_equal(m, 0) def test_nanmedoid_axis_bad(): assert_raises(IndexError, hd.nanmedoid, DATA1, axis=2) def test_nanmedoid_two_obs(): data = np.array([[1.0, np.nan, 1.0], [2.0, 1.0, 1.0]]) m = hd.nanmedoid(data, axis=0) r = np.array([2.0, 1.0, 1.0]) assert_array_almost_equal(m, r, decimal=3) def test_nanmedoid_all_nan(): data = np.array([[np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan]]) assert_raises(ValueError, hd.nanmedoid, data) hdmedians-0.14.2/pyproject.toml000066400000000000000000000001341401603016600164410ustar00rootroot00000000000000[build-system] requires = ["Cython>=0.23", "oldest-supported-numpy", "setuptools", "wheel"] hdmedians-0.14.2/setup.cfg000066400000000000000000000001001401603016600153370ustar00rootroot00000000000000[nosetests] verbosity=2 with-coverage=1 cover-package=hdmedians hdmedians-0.14.2/setup.py000066400000000000000000000015121401603016600152400ustar00rootroot00000000000000""" hdmedians: High-dimensional medians. """ import numpy as np from setuptools import setup, find_packages, Extension from setuptools import setup, Extension #from Cython.Distutils import build_ext extensions = [Extension('hdmedians.geomedian', ['hdmedians/geomedian.pyx'], include_dirs = [np.get_include()])] setup(name='hdmedians', packages=find_packages(), setup_requires=['nose>=1.0', 'Cython>=0.23'], install_requires=['numpy', 'Cython>=0.23'], version='0.14.1', description='High-dimensional medians', url='http://github.com/daleroberts/hdmedians', author='Dale Roberts', author_email='dale.o.roberts@gmail.com', license='Apache License, Version 2.0', # cmdclass = {'build_ext': build_ext}, ext_modules = extensions)