pax_global_header00006660000000000000000000000064142025513730014514gustar00rootroot0000000000000052 comment=7cc0ecbd43723418f43b8e73a46debbbc3940346 hnswlib-0.6.2/000077500000000000000000000000001420255137300131675ustar00rootroot00000000000000hnswlib-0.6.2/.github/000077500000000000000000000000001420255137300145275ustar00rootroot00000000000000hnswlib-0.6.2/.github/workflows/000077500000000000000000000000001420255137300165645ustar00rootroot00000000000000hnswlib-0.6.2/.github/workflows/build.yml000066400000000000000000000010721420255137300204060ustar00rootroot00000000000000name: HNSW CI on: [push, pull_request] jobs: test: runs-on: ${{matrix.os}} strategy: matrix: os: [ubuntu-latest, windows-latest] python-version: ['3.6', '3.7', '3.8', '3.9'] steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - name: Build and install run: python -m pip install . - name: Test run: python -m unittest discover --start-directory python_bindings/tests --pattern "*_test*.py" hnswlib-0.6.2/.gitignore000066400000000000000000000001671420255137300151630ustar00rootroot00000000000000hnswlib.egg-info/ build/ dist/ tmp/ python_bindings/tests/__pycache__/ *.pyd hnswlib.cpython*.so var/ .idea/ .vscode/ hnswlib-0.6.2/ALGO_PARAMS.md000066400000000000000000000050271420255137300153020ustar00rootroot00000000000000# HNSW algorithm parameters ## Search parameters: * ```ef``` - the size of the dynamic list for the nearest neighbors (used during the search). Higher ```ef``` leads to more accurate but slower search. ```ef``` cannot be set lower than the number of queried nearest neighbors ```k```. The value ```ef``` of can be anything between ```k``` and the size of the dataset. * ```k``` number of nearest neighbors to be returned as the result. The ```knn_query``` function returns two numpy arrays, containing labels and distances to the k found nearest elements for the queries. Note that in case the algorithm is not be able to find ```k``` neighbors to all of the queries, (this can be due to problems with graph or ```k```>size of the dataset) an exception is thrown. An example of tuning the parameters can be found in [TESTING_RECALL.md](TESTING_RECALL.md) ## Construction parameters: * ```M``` - the number of bi-directional links created for every new element during construction. Reasonable range for ```M``` is 2-100. Higher ```M``` work better on datasets with high intrinsic dimensionality and/or high recall, while low ```M``` work better for datasets with low intrinsic dimensionality and/or low recalls. The parameter also determines the algorithm's memory consumption, which is roughly ```M * 8-10``` bytes per stored element. As an example for ```dim```=4 random vectors optimal ```M``` for search is somewhere around 6, while for high dimensional datasets (word embeddings, good face descriptors), higher ```M``` are required (e.g. ```M```=48-64) for optimal performance at high recall. The range ```M```=12-48 is ok for the most of the use cases. When ```M``` is changed one has to update the other parameters. Nonetheless, ef and ef_construction parameters can be roughly estimated by assuming that ```M```*```ef_{construction}``` is a constant. * ```ef_construction``` - the parameter has the same meaning as ```ef```, but controls the index_time/index_accuracy. Bigger ef_construction leads to longer construction, but better index quality. At some point, increasing ef_construction does not improve the quality of the index. One way to check if the selection of ef_construction was ok is to measure a recall for M nearest neighbor search when ```ef``` =```ef_construction```: if the recall is lower than 0.9, than there is room for improvement. * ```num_elements``` - defines the maximum number of elements in the index. The index can be extened by saving/loading(load_index function has a parameter which defines the new maximum number of elements). hnswlib-0.6.2/CMakeLists.txt000066400000000000000000000021731420255137300157320ustar00rootroot00000000000000cmake_minimum_required (VERSION 2.6) project(hnsw_lib LANGUAGES CXX) add_library(hnswlib INTERFACE) target_include_directories(hnswlib INTERFACE .) if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME) set(CMAKE_CXX_STANDARD 11) if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") SET( CMAKE_CXX_FLAGS "-Ofast -DNDEBUG -std=c++11 -DHAVE_CXX0X -openmp -march=native -fpic -ftree-vectorize") elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") SET( CMAKE_CXX_FLAGS "-Ofast -lrt -DNDEBUG -std=c++11 -DHAVE_CXX0X -march=native -fpic -w -fopenmp -ftree-vectorize -ftree-vectorizer-verbose=0" ) elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") SET( CMAKE_CXX_FLAGS "-Ofast -lrt -DNDEBUG -std=c++11 -DHAVE_CXX0X -openmp -march=native -fpic -w -fopenmp -ftree-vectorize" ) endif() add_executable(test_updates examples/updates_test.cpp) target_link_libraries(test_updates hnswlib) add_executable(searchKnnCloserFirst_test examples/searchKnnCloserFirst_test.cpp) target_link_libraries(searchKnnCloserFirst_test hnswlib) add_executable(main main.cpp sift_1b.cpp) target_link_libraries(main hnswlib) endif() hnswlib-0.6.2/LICENSE000066400000000000000000000261351420255137300142030ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "{}" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright {yyyy} {name of copyright owner} Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. hnswlib-0.6.2/MANIFEST.in000066400000000000000000000000441420255137300147230ustar00rootroot00000000000000include hnswlib/*.h include LICENSE hnswlib-0.6.2/Makefile000066400000000000000000000004461420255137300146330ustar00rootroot00000000000000pypi: dist twine upload dist/* dist: -rm dist/* pip install build python3 -m build --sdist test: python3 -m unittest discover --start-directory python_bindings/tests --pattern "*_test*.py" clean: rm -rf *.egg-info build dist tmp var tests/__pycache__ hnswlib.cpython*.so .PHONY: dist hnswlib-0.6.2/README.md000066400000000000000000000324441420255137300144550ustar00rootroot00000000000000# Hnswlib - fast approximate nearest neighbor search Header-only C++ HNSW implementation with python bindings. **NEWS:** **version 0.6.2** * Fixed a bug in saving of large pickles. The pickles with > 4GB could have been corrupted. Thanks Kai Wohlfahrt for reporting. * Thanks to ([@GuyAv46](https://github.com/GuyAv46)) hnswlib inner product now is more consitent accross architectures (SSE, AVX, etc). * **version 0.6.1** * Thanks to ([@tony-kuo](https://github.com/tony-kuo)) hnswlib AVX512 and AVX builds are not backwards-compatible with older SSE and non-AVX512 architectures. * Thanks to ([@psobot](https://github.com/psobot)) there is now a sencible message instead of segfault when passing a scalar to get_items. * Thanks to ([@urigoren](https://github.com/urigoren)) hnswlib has a lazy index creation python wrapper. **version 0.6.0** * Thanks to ([@dyashuni](https://github.com/dyashuni)) hnswlib now uses github actions for CI, there is a search speedup in some scenarios with deletions. `unmark_deleted(label)` is now also a part of the python interface (note now it throws an exception for double deletions). * Thanks to ([@slice4e](https://github.com/slice4e)) we now support AVX512; thanks to ([@LTLA](https://github.com/LTLA)) the cmake interface for the lib is now updated. * Thanks to ([@alonre24](https://github.com/alonre24)) we now have a python bindings for brute-force (and examples for recall tuning: [TESTING_RECALL.md](TESTING_RECALL.md). * Thanks to ([@dorosy-yeong](https://github.com/dorosy-yeong)) there is a bug fixed in the handling large quantities of deleted elements and large K. ### Highlights: 1) Lightweight, header-only, no dependencies other than C++ 11 2) Interfaces for C++, Java, Python and R (https://github.com/jlmelville/rcpphnsw). 3) Has full support for incremental index construction. Has support for element deletions (by marking them in index). Index is picklable. 4) Can work with custom user defined distances (C++). 5) Significantly less memory footprint and faster build time compared to current nmslib's implementation. Description of the algorithm parameters can be found in [ALGO_PARAMS.md](ALGO_PARAMS.md). ### Python bindings #### Supported distances: | Distance | parameter | Equation | | ------------- |:---------------:| -----------------------:| |Squared L2 |'l2' | d = sum((Ai-Bi)^2) | |Inner product |'ip' | d = 1.0 - sum(Ai\*Bi) | |Cosine similarity |'cosine' | d = 1.0 - sum(Ai\*Bi) / sqrt(sum(Ai\*Ai) * sum(Bi\*Bi))| Note that inner product is not an actual metric. An element can be closer to some other element than to itself. That allows some speedup if you remove all elements that are not the closest to themselves from the index. For other spaces use the nmslib library https://github.com/nmslib/nmslib. #### Short API description * `hnswlib.Index(space, dim)` creates a non-initialized index an HNSW in space `space` with integer dimension `dim`. `hnswlib.Index` methods: * `init_index(max_elements, M = 16, ef_construction = 200, random_seed = 100)` initializes the index from with no elements. * `max_elements` defines the maximum number of elements that can be stored in the structure(can be increased/shrunk). * `ef_construction` defines a construction time/accuracy trade-off (see [ALGO_PARAMS.md](ALGO_PARAMS.md)). * `M` defines tha maximum number of outgoing connections in the graph ([ALGO_PARAMS.md](ALGO_PARAMS.md)). * `add_items(data, ids, num_threads = -1)` - inserts the `data`(numpy array of vectors, shape:`N*dim`) into the structure. * `num_threads` sets the number of cpu threads to use (-1 means use default). * `ids` are optional N-size numpy array of integer labels for all elements in `data`. - If index already has the elements with the same labels, their features will be updated. Note that update procedure is slower than insertion of a new element, but more memory- and query-efficient. * Thread-safe with other `add_items` calls, but not with `knn_query`. * `mark_deleted(label)` - marks the element as deleted, so it will be omitted from search results. Throws an exception if it is already deleted. * * `unmark_deleted(label)` - unmarks the element as deleted, so it will be not be omitted from search results. * `resize_index(new_size)` - changes the maximum capacity of the index. Not thread safe with `add_items` and `knn_query`. * `set_ef(ef)` - sets the query time accuracy/speed trade-off, defined by the `ef` parameter ( [ALGO_PARAMS.md](ALGO_PARAMS.md)). Note that the parameter is currently not saved along with the index, so you need to set it manually after loading. * `knn_query(data, k = 1, num_threads = -1)` make a batch query for `k` closest elements for each element of the * `data` (shape:`N*dim`). Returns a numpy array of (shape:`N*k`). * `num_threads` sets the number of cpu threads to use (-1 means use default). * Thread-safe with other `knn_query` calls, but not with `add_items`. * `load_index(path_to_index, max_elements = 0)` loads the index from persistence to the uninitialized index. * `max_elements`(optional) resets the maximum number of elements in the structure. * `save_index(path_to_index)` saves the index from persistence. * `set_num_threads(num_threads)` set the default number of cpu threads used during data insertion/querying. * `get_items(ids)` - returns a numpy array (shape:`N*dim`) of vectors that have integer identifiers specified in `ids` numpy vector (shape:`N`). Note that for cosine similarity it currently returns **normalized** vectors. * `get_ids_list()` - returns a list of all elements' ids. * `get_max_elements()` - returns the current capacity of the index * `get_current_count()` - returns the current number of element stored in the index Read-only properties of `hnswlib.Index` class: * `space` - name of the space (can be one of "l2", "ip", or "cosine"). * `dim` - dimensionality of the space. * `M` - parameter that defines the maximum number of outgoing connections in the graph. * `ef_construction` - parameter that controls speed/accuracy trade-off during the index construction. * `max_elements` - current capacity of the index. Equivalent to `p.get_max_elements()`. * `element_count` - number of items in the index. Equivalent to `p.get_current_count()`. Properties of `hnswlib.Index` that support reading and writing: * `ef` - parameter controlling query time/accuracy trade-off. * `num_threads` - default number of threads to use in `add_items` or `knn_query`. Note that calling `p.set_num_threads(3)` is equivalent to `p.num_threads=3`. #### Python bindings examples ```python import hnswlib import numpy as np import pickle dim = 128 num_elements = 10000 # Generating sample data data = np.float32(np.random.random((num_elements, dim))) ids = np.arange(num_elements) # Declaring index p = hnswlib.Index(space = 'l2', dim = dim) # possible options are l2, cosine or ip # Initializing index - the maximum number of elements should be known beforehand p.init_index(max_elements = num_elements, ef_construction = 200, M = 16) # Element insertion (can be called several times): p.add_items(data, ids) # Controlling the recall by setting ef: p.set_ef(50) # ef should always be > k # Query dataset, k - number of closest elements (returns 2 numpy arrays) labels, distances = p.knn_query(data, k = 1) # Index objects support pickling # WARNING: serialization via pickle.dumps(p) or p.__getstate__() is NOT thread-safe with p.add_items method! # Note: ef parameter is included in serialization; random number generator is initialized with random_seed on Index load p_copy = pickle.loads(pickle.dumps(p)) # creates a copy of index p using pickle round-trip ### Index parameters are exposed as class properties: print(f"Parameters passed to constructor: space={p_copy.space}, dim={p_copy.dim}") print(f"Index construction: M={p_copy.M}, ef_construction={p_copy.ef_construction}") print(f"Index size is {p_copy.element_count} and index capacity is {p_copy.max_elements}") print(f"Search speed/quality trade-off parameter: ef={p_copy.ef}") ``` An example with updates after serialization/deserialization: ```python import hnswlib import numpy as np dim = 16 num_elements = 10000 # Generating sample data data = np.float32(np.random.random((num_elements, dim))) # We split the data in two batches: data1 = data[:num_elements // 2] data2 = data[num_elements // 2:] # Declaring index p = hnswlib.Index(space='l2', dim=dim) # possible options are l2, cosine or ip # Initializing index # max_elements - the maximum number of elements (capacity). Will throw an exception if exceeded # during insertion of an element. # The capacity can be increased by saving/loading the index, see below. # # ef_construction - controls index search speed/build speed tradeoff # # M - is tightly connected with internal dimensionality of the data. Strongly affects memory consumption (~M) # Higher M leads to higher accuracy/run_time at fixed ef/efConstruction p.init_index(max_elements=num_elements//2, ef_construction=100, M=16) # Controlling the recall by setting ef: # higher ef leads to better accuracy, but slower search p.set_ef(10) # Set number of threads used during batch search/construction # By default using all available cores p.set_num_threads(4) print("Adding first batch of %d elements" % (len(data1))) p.add_items(data1) # Query the elements for themselves and measure recall: labels, distances = p.knn_query(data1, k=1) print("Recall for the first batch:", np.mean(labels.reshape(-1) == np.arange(len(data1))), "\n") # Serializing and deleting the index: index_path='first_half.bin' print("Saving index to '%s'" % index_path) p.save_index("first_half.bin") del p # Re-initializing, loading the index p = hnswlib.Index(space='l2', dim=dim) # the space can be changed - keeps the data, alters the distance function. print("\nLoading index from 'first_half.bin'\n") # Increase the total capacity (max_elements), so that it will handle the new data p.load_index("first_half.bin", max_elements = num_elements) print("Adding the second batch of %d elements" % (len(data2))) p.add_items(data2) # Query the elements for themselves and measure recall: labels, distances = p.knn_query(data, k=1) print("Recall for two batches:", np.mean(labels.reshape(-1) == np.arange(len(data))), "\n") ``` ### Bindings installation You can install from sources: ```bash apt-get install -y python-setuptools python-pip git clone https://github.com/nmslib/hnswlib.git cd hnswlib pip install . ``` or you can install via pip: `pip install hnswlib` ### For developers Contributions are highly welcome! Please make pull requests against the `develop` branch. When making changes please run tests (and please add a test to `python_bindings/tests` in case there is new functionality): ```bash python -m unittest discover --start-directory python_bindings/tests --pattern "*_test*.py ``` ### Other implementations * Non-metric space library (nmslib) - main library(python, C++), supports exotic distances: https://github.com/nmslib/nmslib * Faiss library by facebook, uses own HNSW implementation for coarse quantization (python, C++): https://github.com/facebookresearch/faiss * Code for the paper ["Revisiting the Inverted Indices for Billion-Scale Approximate Nearest Neighbors"](https://arxiv.org/abs/1802.02422) (current state-of-the-art in compressed indexes, C++): https://github.com/dbaranchuk/ivf-hnsw * TOROS N2 (python, C++): https://github.com/kakao/n2 * Online HNSW (C++): https://github.com/andrusha97/online-hnsw) * Go implementation: https://github.com/Bithack/go-hnsw * Python implementation (as a part of the clustering code by by Matteo Dell'Amico): https://github.com/matteodellamico/flexible-clustering * Java implementation: https://github.com/jelmerk/hnswlib * Java bindings using Java Native Access: https://github.com/stepstone-tech/hnswlib-jna * .Net implementation: https://github.com/microsoft/HNSW.Net * CUDA implementation: https://github.com/js1010/cuhnsw ### 200M SIFT test reproduction To download and extract the bigann dataset (from root directory): ```bash python3 download_bigann.py ``` To compile: ```bash mkdir build cd build cmake .. make all ``` To run the test on 200M SIFT subset: ```bash ./main ``` The size of the BigANN subset (in millions) is controlled by the variable **subset_size_millions** hardcoded in **sift_1b.cpp**. ### Updates test To generate testing data (from root directory): ```bash cd examples python update_gen_data.py ``` To compile (from root directory): ```bash mkdir build cd build cmake .. make ``` To run test **without** updates (from `build` directory) ```bash ./test_updates ``` To run test **with** updates (from `build` directory) ```bash ./test_updates update ``` ### HNSW example demos - Visual search engine for 1M amazon products (MXNet + HNSW): [website](https://thomasdelteil.github.io/VisualSearch_MXNet/), [code](https://github.com/ThomasDelteil/VisualSearch_MXNet), demo by [@ThomasDelteil](https://github.com/ThomasDelteil) ### References @article{malkov2018efficient, title={Efficient and robust approximate nearest neighbor search using hierarchical navigable small world graphs}, author={Malkov, Yu A and Yashunin, Dmitry A}, journal={IEEE transactions on pattern analysis and machine intelligence}, volume={42}, number={4}, pages={824--836}, year={2018}, publisher={IEEE} } hnswlib-0.6.2/TESTING_RECALL.md000066400000000000000000000067401420255137300156570ustar00rootroot00000000000000# Testing recall Selecting HNSW parameters for a specific use case highly impacts the search quality. One way to test the quality of the constructed index is to compare the HNSW search results to the actual results (i.e., the actual `k` nearest neighbors). For that cause, the API enables creating a simple "brute-force" index in which vectors are stored as is, and searching for the `k` nearest neighbors to a query vector requires going over the entire index. Comparing between HNSW and brute-force results may help with finding the desired HNSW parameters for achieving a satisfying recall, based on the index size and data dimension. ### Brute force index API `hnswlib.BFIndex(space, dim)` creates a non-initialized index in space `space` with integer dimension `dim`. `hnswlib.BFIndex` methods: `init_index(max_elements)` initializes the index with no elements. max_elements defines the maximum number of elements that can be stored in the structure. `add_items(data, ids)` inserts the data (numpy array of vectors, shape:`N*dim`) into the structure. `ids` are optional N-size numpy array of integer labels for all elements in data. `delete_vector(label)` delete the element associated with the given `label` so it will be omitted from search results. `knn_query(data, k = 1)` make a batch query for `k `closest elements for each element of the `data` (shape:`N*dim`). Returns a numpy array of (shape:`N*k`). `load_index(path_to_index, max_elements = 0)` loads the index from persistence to the uninitialized index. `save_index(path_to_index)` saves the index from persistence. ### measuring recall example ``` import hnswlib import numpy as np dim = 32 num_elements = 100000 k = 10 nun_queries = 10 # Generating sample data data = np.float32(np.random.random((num_elements, dim))) # Declaring index hnsw_index = hnswlib.Index(space='l2', dim=dim) # possible options are l2, cosine or ip bf_index = hnswlib.BFIndex(space='l2', dim=dim) # Initing both hnsw and brute force indices # max_elements - the maximum number of elements (capacity). Will throw an exception if exceeded # during insertion of an element. # The capacity can be increased by saving/loading the index, see below. # # hnsw construction params: # ef_construction - controls index search speed/build speed tradeoff # # M - is tightly connected with internal dimensionality of the data. Strongly affects the memory consumption (~M) # Higher M leads to higher accuracy/run_time at fixed ef/efConstruction hnsw_index.init_index(max_elements=num_elements, ef_construction=200, M=16) bf_index.init_index(max_elements=num_elements) # Controlling the recall for hnsw by setting ef: # higher ef leads to better accuracy, but slower search hnsw_index.set_ef(200) # Set number of threads used during batch search/construction in hnsw # By default using all available cores hnsw_index.set_num_threads(1) print("Adding batch of %d elements" % (len(data))) hnsw_index.add_items(data) bf_index.add_items(data) print("Indices built") # Generating query data query_data = np.float32(np.random.random((nun_queries, dim))) # Query the elements and measure recall: labels_hnsw, distances_hnsw = hnsw_index.knn_query(query_data, k) labels_bf, distances_bf = bf_index.knn_query(query_data, k) # Measure recall correct = 0 for i in range(nun_queries): for label in labels_hnsw[i]: for correct_label in labels_bf[i]: if label == correct_label: correct += 1 break print("recall is :", float(correct)/(k*nun_queries)) ``` hnswlib-0.6.2/download_bigann.py000066400000000000000000000020001420255137300166560ustar00rootroot00000000000000import os.path import os links = ['ftp://ftp.irisa.fr/local/texmex/corpus/bigann_query.bvecs.gz', 'ftp://ftp.irisa.fr/local/texmex/corpus/bigann_gnd.tar.gz', 'ftp://ftp.irisa.fr/local/texmex/corpus/bigann_base.bvecs.gz'] os.makedirs('downloads', exist_ok=True) os.makedirs('bigann', exist_ok=True) for link in links: name = link.rsplit('/', 1)[-1] filename = os.path.join('downloads', name) if not os.path.isfile(filename): print('Downloading: ' + filename) try: os.system('wget --output-document=' + filename + ' ' + link) except Exception as inst: print(inst) print(' Encountered unknown error. Continuing.') else: print('Already downloaded: ' + filename) if filename.endswith('.tar.gz'): command = 'tar -zxf ' + filename + ' --directory bigann' else: command = 'cat ' + filename + ' | gzip -dc > bigann/' + name.replace(".gz", "") print("Unpacking file:", command) os.system(command) hnswlib-0.6.2/examples/000077500000000000000000000000001420255137300150055ustar00rootroot00000000000000hnswlib-0.6.2/examples/example.py000066400000000000000000000041651420255137300170200ustar00rootroot00000000000000import hnswlib import numpy as np dim = 16 num_elements = 10000 # Generating sample data data = np.float32(np.random.random((num_elements, dim))) # We split the data in two batches: data1 = data[:num_elements // 2] data2 = data[num_elements // 2:] # Declaring index p = hnswlib.Index(space='l2', dim=dim) # possible options are l2, cosine or ip # Initing index # max_elements - the maximum number of elements (capacity). Will throw an exception if exceeded # during insertion of an element. # The capacity can be increased by saving/loading the index, see below. # # ef_construction - controls index search speed/build speed tradeoff # # M - is tightly connected with internal dimensionality of the data. Strongly affects the memory consumption (~M) # Higher M leads to higher accuracy/run_time at fixed ef/efConstruction p.init_index(max_elements=num_elements//2, ef_construction=100, M=16) # Controlling the recall by setting ef: # higher ef leads to better accuracy, but slower search p.set_ef(10) # Set number of threads used during batch search/construction # By default using all available cores p.set_num_threads(4) print("Adding first batch of %d elements" % (len(data1))) p.add_items(data1) # Query the elements for themselves and measure recall: labels, distances = p.knn_query(data1, k=1) print("Recall for the first batch:", np.mean(labels.reshape(-1) == np.arange(len(data1))), "\n") # Serializing and deleting the index: index_path='first_half.bin' print("Saving index to '%s'" % index_path) p.save_index(index_path) del p # Reiniting, loading the index p = hnswlib.Index(space='l2', dim=dim) # the space can be changed - keeps the data, alters the distance function. print("\nLoading index from 'first_half.bin'\n") # Increase the total capacity (max_elements), so that it will handle the new data p.load_index("first_half.bin", max_elements = num_elements) print("Adding the second batch of %d elements" % (len(data2))) p.add_items(data2) # Query the elements for themselves and measure recall: labels, distances = p.knn_query(data, k=1) print("Recall for two batches:", np.mean(labels.reshape(-1) == np.arange(len(data))), "\n") hnswlib-0.6.2/examples/example_old.py000066400000000000000000000034041420255137300176510ustar00rootroot00000000000000import hnswlib import numpy as np dim = 16 num_elements = 10000 # Generating sample data data = np.float32(np.random.random((num_elements, dim))) # Declaring index p = hnswlib.Index(space='l2', dim=dim) # possible options are l2, cosine or ip # Initing index # max_elements - the maximum number of elements, should be known beforehand # (probably will be made optional in the future) # # ef_construction - controls index search speed/build speed tradeoff # M - is tightly connected with internal dimensionality of the data # stronlgy affects the memory consumption p.init_index(max_elements=num_elements, ef_construction=100, M=16) # Controlling the recall by setting ef: # higher ef leads to better accuracy, but slower search p.set_ef(10) p.set_num_threads(4) # by default using all available cores # We split the data in two batches: data1 = data[:num_elements // 2] data2 = data[num_elements // 2:] print("Adding first batch of %d elements" % (len(data1))) p.add_items(data1) # Query the elements for themselves and measure recall: labels, distances = p.knn_query(data1, k=1) print("Recall for the first batch:", np.mean(labels.reshape(-1) == np.arange(len(data1))), "\n") # Serializing and deleting the index: index_path='first_half.bin' print("Saving index to '%s'" % index_path) p.save_index("first_half.bin") del p # Reiniting, loading the index p = hnswlib.Index(space='l2', dim=dim) # you can change the sa print("\nLoading index from 'first_half.bin'\n") p.load_index("first_half.bin") print("Adding the second batch of %d elements" % (len(data2))) p.add_items(data2) # Query the elements for themselves and measure recall: labels, distances = p.knn_query(data, k=1) print("Recall for two batches:", np.mean(labels.reshape(-1) == np.arange(len(data))), "\n") hnswlib-0.6.2/examples/git_tester.py000066400000000000000000000023011420255137300175240ustar00rootroot00000000000000from pydriller import Repository import os import datetime os.system("cp examples/speedtest.py examples/speedtest2.py") # the file has to be outside of git for idx, commit in enumerate(Repository('.', from_tag="v0.6.0").traverse_commits()): name=commit.msg.replace('\n', ' ').replace('\r', ' ') print(idx, commit.hash, name) for commit in Repository('.', from_tag="v0.6.0").traverse_commits(): name=commit.msg.replace('\n', ' ').replace('\r', ' ') print(commit.hash, name) os.system(f"git checkout {commit.hash}; rm -rf build; ") print("\n\n--------------------\n\n") ret=os.system("python -m pip install .") print(ret) if ret != 0: print ("build failed!!!!") print ("build failed!!!!") print ("build failed!!!!") print ("build failed!!!!") continue os.system(f'python examples/speedtest2.py -n "{name}" -d 4 -t 1') os.system(f'python examples/speedtest2.py -n "{name}" -d 64 -t 1') os.system(f'python examples/speedtest2.py -n "{name}" -d 128 -t 1') os.system(f'python examples/speedtest2.py -n "{name}" -d 4 -t 24') os.system(f'python examples/speedtest2.py -n "{name}" -d 128 -t 24') hnswlib-0.6.2/examples/pyw_hnswlib.py000066400000000000000000000035111420255137300177240ustar00rootroot00000000000000import hnswlib import numpy as np import threading import pickle class Index(): def __init__(self, space, dim): self.index = hnswlib.Index(space, dim) self.lock = threading.Lock() self.dict_labels = {} self.cur_ind = 0 def init_index(self, max_elements, ef_construction=200, M=16): self.index.init_index(max_elements=max_elements, ef_construction=ef_construction, M=M) def add_items(self, data, ids=None): if ids is not None: assert len(data) == len(ids) num_added = len(data) with self.lock: start = self.cur_ind self.cur_ind += num_added int_labels = [] if ids is not None: for dl in ids: int_labels.append(start) self.dict_labels[start] = dl start += 1 else: for _ in range(len(data)): int_labels.append(start) self.dict_labels[start] = start start += 1 self.index.add_items(data=data, ids=np.asarray(int_labels)) def set_ef(self, ef): self.index.set_ef(ef) def load_index(self, path): self.index.load_index(path) with open(path + ".pkl", "rb") as f: self.cur_ind, self.dict_labels = pickle.load(f) def save_index(self, path): self.index.save_index(path) with open(path + ".pkl", "wb") as f: pickle.dump((self.cur_ind, self.dict_labels), f) def set_num_threads(self, num_threads): self.index.set_num_threads(num_threads) def knn_query(self, data, k=1): labels_int, distances = self.index.knn_query(data=data, k=k) labels = [] for li in labels_int: labels.append( [self.dict_labels[l] for l in li] ) return labels, distances hnswlib-0.6.2/examples/searchKnnCloserFirst_test.cpp000066400000000000000000000041411420255137300226440ustar00rootroot00000000000000// This is a test file for testing the interface // >>> virtual std::vector> // >>> searchKnnCloserFirst(const void* query_data, size_t k) const; // of class AlgorithmInterface #include "../hnswlib/hnswlib.h" #include #include #include namespace { using idx_t = hnswlib::labeltype; void test() { int d = 4; idx_t n = 100; idx_t nq = 10; size_t k = 10; std::vector data(n * d); std::vector query(nq * d); std::mt19937 rng; rng.seed(47); std::uniform_real_distribution<> distrib; for (idx_t i = 0; i < n * d; ++i) { data[i] = distrib(rng); } for (idx_t i = 0; i < nq * d; ++i) { query[i] = distrib(rng); } hnswlib::L2Space space(d); hnswlib::AlgorithmInterface* alg_brute = new hnswlib::BruteforceSearch(&space, 2 * n); hnswlib::AlgorithmInterface* alg_hnsw = new hnswlib::HierarchicalNSW(&space, 2 * n); for (size_t i = 0; i < n; ++i) { alg_brute->addPoint(data.data() + d * i, i); alg_hnsw->addPoint(data.data() + d * i, i); } // test searchKnnCloserFirst of BruteforceSearch for (size_t j = 0; j < nq; ++j) { const void* p = query.data() + j * d; auto gd = alg_brute->searchKnn(p, k); auto res = alg_brute->searchKnnCloserFirst(p, k); assert(gd.size() == res.size()); size_t t = gd.size(); while (!gd.empty()) { assert(gd.top() == res[--t]); gd.pop(); } } for (size_t j = 0; j < nq; ++j) { const void* p = query.data() + j * d; auto gd = alg_hnsw->searchKnn(p, k); auto res = alg_hnsw->searchKnnCloserFirst(p, k); assert(gd.size() == res.size()); size_t t = gd.size(); while (!gd.empty()) { assert(gd.top() == res[--t]); gd.pop(); } } delete alg_brute; delete alg_hnsw; } } // namespace int main() { std::cout << "Testing ..." << std::endl; test(); std::cout << "Test ok" << std::endl; return 0; } hnswlib-0.6.2/examples/speedtest.py000066400000000000000000000030771420255137300173660ustar00rootroot00000000000000import hnswlib import numpy as np import os.path import time import argparse # Use nargs to specify how many arguments an option should take. ap = argparse.ArgumentParser() ap.add_argument('-d') ap.add_argument('-n') ap.add_argument('-t') args = ap.parse_args() dim = int(args.d) name = args.n threads=int(args.t) num_elements = 1000000 * 4//dim # Generating sample data np.random.seed(1) data = np.float32(np.random.random((num_elements, dim))) index_path=f'speed_index{dim}.bin' # Declaring index p = hnswlib.Index(space='l2', dim=dim) # possible options are l2, cosine or ip if not os.path.isfile(index_path) : p.init_index(max_elements=num_elements, ef_construction=100, M=16) # Controlling the recall by setting ef: # higher ef leads to better accuracy, but slower search p.set_ef(10) # Set number of threads used during batch search/construction # By default using all available cores p.set_num_threads(12) p.add_items(data) # Serializing and deleting the index: print("Saving index to '%s'" % index_path) p.save_index(index_path) p.set_num_threads(threads) times=[] time.sleep(10) p.set_ef(100) for _ in range(3): p.load_index(index_path) for _ in range(10): t0=time.time() labels, distances = p.knn_query(data, k=1) tt=time.time()-t0 times.append(tt) print(f"{tt} seconds") str_out=f"mean time:{np.mean(times)}, median time:{np.median(times)}, std time {np.std(times)} {name}" print(str_out) with open (f"log_{dim}_t{threads}.txt","a") as f: f.write(str_out+"\n") f.flush() hnswlib-0.6.2/examples/update_gen_data.py000066400000000000000000000020171420255137300204630ustar00rootroot00000000000000import numpy as np import os def normalized(a, axis=-1, order=2): l2 = np.atleast_1d(np.linalg.norm(a, order, axis)) l2[l2==0] = 1 return a / np.expand_dims(l2, axis) N=100000 dummy_data_multiplier=3 N_queries = 1000 d=8 K=5 np.random.seed(1) print("Generating data...") batches_dummy= [ normalized(np.float32(np.random.random( (N,d)))) for _ in range(dummy_data_multiplier)] batch_final = normalized (np.float32(np.random.random( (N,d)))) queries = normalized(np.float32(np.random.random( (N_queries,d)))) print("Computing distances...") dist=np.dot(queries,batch_final.T) topk=np.argsort(-dist)[:,:K] print("Saving...") try: os.mkdir("data") except OSError as e: pass for idx, batch_dummy in enumerate(batches_dummy): batch_dummy.tofile('data/batch_dummy_%02d.bin' % idx) batch_final.tofile('data/batch_final.bin') queries.tofile('data/queries.bin') np.int32(topk).tofile('data/gt.bin') with open("data/config.txt", "w") as file: file.write("%d %d %d %d %d" %(N, dummy_data_multiplier, N_queries, d, K))hnswlib-0.6.2/examples/updates_test.cpp000066400000000000000000000211711420255137300202170ustar00rootroot00000000000000#include "../hnswlib/hnswlib.h" #include class StopW { std::chrono::steady_clock::time_point time_begin; public: StopW() { time_begin = std::chrono::steady_clock::now(); } float getElapsedTimeMicro() { std::chrono::steady_clock::time_point time_end = std::chrono::steady_clock::now(); return (std::chrono::duration_cast(time_end - time_begin).count()); } void reset() { time_begin = std::chrono::steady_clock::now(); } }; /* * replacement for the openmp '#pragma omp parallel for' directive * only handles a subset of functionality (no reductions etc) * Process ids from start (inclusive) to end (EXCLUSIVE) * * The method is borrowed from nmslib */ template inline void ParallelFor(size_t start, size_t end, size_t numThreads, Function fn) { if (numThreads <= 0) { numThreads = std::thread::hardware_concurrency(); } if (numThreads == 1) { for (size_t id = start; id < end; id++) { fn(id, 0); } } else { std::vector threads; std::atomic current(start); // keep track of exceptions in threads // https://stackoverflow.com/a/32428427/1713196 std::exception_ptr lastException = nullptr; std::mutex lastExceptMutex; for (size_t threadId = 0; threadId < numThreads; ++threadId) { threads.push_back(std::thread([&, threadId] { while (true) { size_t id = current.fetch_add(1); if ((id >= end)) { break; } try { fn(id, threadId); } catch (...) { std::unique_lock lastExcepLock(lastExceptMutex); lastException = std::current_exception(); /* * This will work even when current is the largest value that * size_t can fit, because fetch_add returns the previous value * before the increment (what will result in overflow * and produce 0 instead of current + 1). */ current = end; break; } } })); } for (auto &thread : threads) { thread.join(); } if (lastException) { std::rethrow_exception(lastException); } } } template std::vector load_batch(std::string path, int size) { std::cout << "Loading " << path << "..."; // float or int32 (python) assert(sizeof(datatype) == 4); std::ifstream file; file.open(path); if (!file.is_open()) { std::cout << "Cannot open " << path << "\n"; exit(1); } std::vector batch(size); file.read((char *)batch.data(), size * sizeof(float)); std::cout << " DONE\n"; return batch; } template static float test_approx(std::vector &queries, size_t qsize, hnswlib::HierarchicalNSW &appr_alg, size_t vecdim, std::vector> &answers, size_t K) { size_t correct = 0; size_t total = 0; //uncomment to test in parallel mode: for (int i = 0; i < qsize; i++) { std::priority_queue> result = appr_alg.searchKnn((char *)(queries.data() + vecdim * i), K); total += K; while (result.size()) { if (answers[i].find(result.top().second) != answers[i].end()) { correct++; } else { } result.pop(); } } return 1.0f * correct / total; } static void test_vs_recall(std::vector &queries, size_t qsize, hnswlib::HierarchicalNSW &appr_alg, size_t vecdim, std::vector> &answers, size_t k) { std::vector efs = {1}; for (int i = k; i < 30; i++) { efs.push_back(i); } for (int i = 30; i < 400; i+=10) { efs.push_back(i); } for (int i = 1000; i < 100000; i += 5000) { efs.push_back(i); } std::cout << "ef\trecall\ttime\thops\tdistcomp\n"; for (size_t ef : efs) { appr_alg.setEf(ef); appr_alg.metric_hops=0; appr_alg.metric_distance_computations=0; StopW stopw = StopW(); float recall = test_approx(queries, qsize, appr_alg, vecdim, answers, k); float time_us_per_query = stopw.getElapsedTimeMicro() / qsize; float distance_comp_per_query = appr_alg.metric_distance_computations / (1.0f * qsize); float hops_per_query = appr_alg.metric_hops / (1.0f * qsize); std::cout << ef << "\t" << recall << "\t" << time_us_per_query << "us \t"< 0.99) { std::cout << "Recall is over 0.99! "<2){ std::cout<<"Usage ./test_updates [update]\n"; exit(1); } std::string path = "../examples/data/"; int N; int dummy_data_multiplier; int N_queries; int d; int K; { std::ifstream configfile; configfile.open(path + "/config.txt"); if (!configfile.is_open()) { std::cout << "Cannot open config.txt\n"; return 1; } configfile >> N >> dummy_data_multiplier >> N_queries >> d >> K; printf("Loaded config: N=%d, d_mult=%d, Nq=%d, dim=%d, K=%d\n", N, dummy_data_multiplier, N_queries, d, K); } hnswlib::L2Space l2space(d); hnswlib::HierarchicalNSW appr_alg(&l2space, N + 1, M, efConstruction); std::vector dummy_batch = load_batch(path + "batch_dummy_00.bin", N * d); // Adding enterpoint: appr_alg.addPoint((void *)dummy_batch.data(), (size_t)0); StopW stopw = StopW(); if (update) { std::cout << "Update iteration 0\n"; ParallelFor(1, N, num_threads, [&](size_t i, size_t threadId) { appr_alg.addPoint((void *)(dummy_batch.data() + i * d), i); }); appr_alg.checkIntegrity(); ParallelFor(1, N, num_threads, [&](size_t i, size_t threadId) { appr_alg.addPoint((void *)(dummy_batch.data() + i * d), i); }); appr_alg.checkIntegrity(); for (int b = 1; b < dummy_data_multiplier; b++) { std::cout << "Update iteration " << b << "\n"; char cpath[1024]; sprintf(cpath, "batch_dummy_%02d.bin", b); std::vector dummy_batchb = load_batch(path + cpath, N * d); ParallelFor(0, N, num_threads, [&](size_t i, size_t threadId) { appr_alg.addPoint((void *)(dummy_batch.data() + i * d), i); }); appr_alg.checkIntegrity(); } } std::cout << "Inserting final elements\n"; std::vector final_batch = load_batch(path + "batch_final.bin", N * d); stopw.reset(); ParallelFor(0, N, num_threads, [&](size_t i, size_t threadId) { appr_alg.addPoint((void *)(final_batch.data() + i * d), i); }); std::cout<<"Finished. Time taken:" << stopw.getElapsedTimeMicro()*1e-6 << " s\n"; std::cout << "Running tests\n"; std::vector queries_batch = load_batch(path + "queries.bin", N_queries * d); std::vector gt = load_batch(path + "gt.bin", N_queries * K); std::vector> answers(N_queries); for (int i = 0; i < N_queries; i++) { for (int j = 0; j < K; j++) { answers[i].insert(gt[i * K + j]); } } for (int i = 0; i < 3; i++) { std::cout << "Test iteration " << i << "\n"; test_vs_recall(queries_batch, N_queries, appr_alg, d, answers, K); } return 0; };hnswlib-0.6.2/hnswlib/000077500000000000000000000000001420255137300146355ustar00rootroot00000000000000hnswlib-0.6.2/hnswlib/bruteforce.h000066400000000000000000000125601420255137300171520ustar00rootroot00000000000000#pragma once #include #include #include #include namespace hnswlib { template class BruteforceSearch : public AlgorithmInterface { public: BruteforceSearch(SpaceInterface *s) { } BruteforceSearch(SpaceInterface *s, const std::string &location) { loadIndex(location, s); } BruteforceSearch(SpaceInterface *s, size_t maxElements) { maxelements_ = maxElements; data_size_ = s->get_data_size(); fstdistfunc_ = s->get_dist_func(); dist_func_param_ = s->get_dist_func_param(); size_per_element_ = data_size_ + sizeof(labeltype); data_ = (char *) malloc(maxElements * size_per_element_); if (data_ == nullptr) std::runtime_error("Not enough memory: BruteforceSearch failed to allocate data"); cur_element_count = 0; } ~BruteforceSearch() { free(data_); } char *data_; size_t maxelements_; size_t cur_element_count; size_t size_per_element_; size_t data_size_; DISTFUNC fstdistfunc_; void *dist_func_param_; std::mutex index_lock; std::unordered_map dict_external_to_internal; void addPoint(const void *datapoint, labeltype label) { int idx; { std::unique_lock lock(index_lock); auto search=dict_external_to_internal.find(label); if (search != dict_external_to_internal.end()) { idx=search->second; } else{ if (cur_element_count >= maxelements_) { throw std::runtime_error("The number of elements exceeds the specified limit\n"); } idx=cur_element_count; dict_external_to_internal[label] = idx; cur_element_count++; } } memcpy(data_ + size_per_element_ * idx + data_size_, &label, sizeof(labeltype)); memcpy(data_ + size_per_element_ * idx, datapoint, data_size_); }; void removePoint(labeltype cur_external) { size_t cur_c=dict_external_to_internal[cur_external]; dict_external_to_internal.erase(cur_external); labeltype label=*((labeltype*)(data_ + size_per_element_ * (cur_element_count-1) + data_size_)); dict_external_to_internal[label]=cur_c; memcpy(data_ + size_per_element_ * cur_c, data_ + size_per_element_ * (cur_element_count-1), data_size_+sizeof(labeltype)); cur_element_count--; } std::priority_queue> searchKnn(const void *query_data, size_t k) const { std::priority_queue> topResults; if (cur_element_count == 0) return topResults; for (int i = 0; i < k; i++) { dist_t dist = fstdistfunc_(query_data, data_ + size_per_element_ * i, dist_func_param_); topResults.push(std::pair(dist, *((labeltype *) (data_ + size_per_element_ * i + data_size_)))); } dist_t lastdist = topResults.top().first; for (int i = k; i < cur_element_count; i++) { dist_t dist = fstdistfunc_(query_data, data_ + size_per_element_ * i, dist_func_param_); if (dist <= lastdist) { topResults.push(std::pair(dist, *((labeltype *) (data_ + size_per_element_ * i + data_size_)))); if (topResults.size() > k) topResults.pop(); lastdist = topResults.top().first; } } return topResults; }; void saveIndex(const std::string &location) { std::ofstream output(location, std::ios::binary); std::streampos position; writeBinaryPOD(output, maxelements_); writeBinaryPOD(output, size_per_element_); writeBinaryPOD(output, cur_element_count); output.write(data_, maxelements_ * size_per_element_); output.close(); } void loadIndex(const std::string &location, SpaceInterface *s) { std::ifstream input(location, std::ios::binary); std::streampos position; readBinaryPOD(input, maxelements_); readBinaryPOD(input, size_per_element_); readBinaryPOD(input, cur_element_count); data_size_ = s->get_data_size(); fstdistfunc_ = s->get_dist_func(); dist_func_param_ = s->get_dist_func_param(); size_per_element_ = data_size_ + sizeof(labeltype); data_ = (char *) malloc(maxelements_ * size_per_element_); if (data_ == nullptr) std::runtime_error("Not enough memory: loadIndex failed to allocate data"); input.read(data_, maxelements_ * size_per_element_); input.close(); } }; } hnswlib-0.6.2/hnswlib/hnswalg.h000066400000000000000000001463731420255137300164670ustar00rootroot00000000000000#pragma once #include "visited_list_pool.h" #include "hnswlib.h" #include #include #include #include #include #include namespace hnswlib { typedef unsigned int tableint; typedef unsigned int linklistsizeint; template class HierarchicalNSW : public AlgorithmInterface { public: static const tableint max_update_element_locks = 65536; HierarchicalNSW(SpaceInterface *s) { } HierarchicalNSW(SpaceInterface *s, const std::string &location, bool nmslib = false, size_t max_elements=0) { loadIndex(location, s, max_elements); } HierarchicalNSW(SpaceInterface *s, size_t max_elements, size_t M = 16, size_t ef_construction = 200, size_t random_seed = 100) : link_list_locks_(max_elements), link_list_update_locks_(max_update_element_locks), element_levels_(max_elements) { max_elements_ = max_elements; num_deleted_ = 0; data_size_ = s->get_data_size(); fstdistfunc_ = s->get_dist_func(); dist_func_param_ = s->get_dist_func_param(); M_ = M; maxM_ = M_; maxM0_ = M_ * 2; ef_construction_ = std::max(ef_construction,M_); ef_ = 10; level_generator_.seed(random_seed); update_probability_generator_.seed(random_seed + 1); size_links_level0_ = maxM0_ * sizeof(tableint) + sizeof(linklistsizeint); size_data_per_element_ = size_links_level0_ + data_size_ + sizeof(labeltype); offsetData_ = size_links_level0_; label_offset_ = size_links_level0_ + data_size_; offsetLevel0_ = 0; data_level0_memory_ = (char *) malloc(max_elements_ * size_data_per_element_); if (data_level0_memory_ == nullptr) throw std::runtime_error("Not enough memory"); cur_element_count = 0; visited_list_pool_ = new VisitedListPool(1, max_elements); //initializations for special treatment of the first node enterpoint_node_ = -1; maxlevel_ = -1; linkLists_ = (char **) malloc(sizeof(void *) * max_elements_); if (linkLists_ == nullptr) throw std::runtime_error("Not enough memory: HierarchicalNSW failed to allocate linklists"); size_links_per_element_ = maxM_ * sizeof(tableint) + sizeof(linklistsizeint); mult_ = 1 / log(1.0 * M_); revSize_ = 1.0 / mult_; } struct CompareByFirst { constexpr bool operator()(std::pair const &a, std::pair const &b) const noexcept { return a.first < b.first; } }; ~HierarchicalNSW() { free(data_level0_memory_); for (tableint i = 0; i < cur_element_count; i++) { if (element_levels_[i] > 0) free(linkLists_[i]); } free(linkLists_); delete visited_list_pool_; } size_t max_elements_; size_t cur_element_count; size_t size_data_per_element_; size_t size_links_per_element_; size_t num_deleted_; size_t M_; size_t maxM_; size_t maxM0_; size_t ef_construction_; double mult_, revSize_; int maxlevel_; VisitedListPool *visited_list_pool_; std::mutex cur_element_count_guard_; std::vector link_list_locks_; // Locks to prevent race condition during update/insert of an element at same time. // Note: Locks for additions can also be used to prevent this race condition if the querying of KNN is not exposed along with update/inserts i.e multithread insert/update/query in parallel. std::vector link_list_update_locks_; tableint enterpoint_node_; size_t size_links_level0_; size_t offsetData_, offsetLevel0_; char *data_level0_memory_; char **linkLists_; std::vector element_levels_; size_t data_size_; size_t label_offset_; DISTFUNC fstdistfunc_; void *dist_func_param_; std::unordered_map label_lookup_; std::default_random_engine level_generator_; std::default_random_engine update_probability_generator_; inline labeltype getExternalLabel(tableint internal_id) const { labeltype return_label; memcpy(&return_label,(data_level0_memory_ + internal_id * size_data_per_element_ + label_offset_), sizeof(labeltype)); return return_label; } inline void setExternalLabel(tableint internal_id, labeltype label) const { memcpy((data_level0_memory_ + internal_id * size_data_per_element_ + label_offset_), &label, sizeof(labeltype)); } inline labeltype *getExternalLabeLp(tableint internal_id) const { return (labeltype *) (data_level0_memory_ + internal_id * size_data_per_element_ + label_offset_); } inline char *getDataByInternalId(tableint internal_id) const { return (data_level0_memory_ + internal_id * size_data_per_element_ + offsetData_); } int getRandomLevel(double reverse_size) { std::uniform_real_distribution distribution(0.0, 1.0); double r = -log(distribution(level_generator_)) * reverse_size; return (int) r; } std::priority_queue, std::vector>, CompareByFirst> searchBaseLayer(tableint ep_id, const void *data_point, int layer) { VisitedList *vl = visited_list_pool_->getFreeVisitedList(); vl_type *visited_array = vl->mass; vl_type visited_array_tag = vl->curV; std::priority_queue, std::vector>, CompareByFirst> top_candidates; std::priority_queue, std::vector>, CompareByFirst> candidateSet; dist_t lowerBound; if (!isMarkedDeleted(ep_id)) { dist_t dist = fstdistfunc_(data_point, getDataByInternalId(ep_id), dist_func_param_); top_candidates.emplace(dist, ep_id); lowerBound = dist; candidateSet.emplace(-dist, ep_id); } else { lowerBound = std::numeric_limits::max(); candidateSet.emplace(-lowerBound, ep_id); } visited_array[ep_id] = visited_array_tag; while (!candidateSet.empty()) { std::pair curr_el_pair = candidateSet.top(); if ((-curr_el_pair.first) > lowerBound && top_candidates.size() == ef_construction_) { break; } candidateSet.pop(); tableint curNodeNum = curr_el_pair.second; std::unique_lock lock(link_list_locks_[curNodeNum]); int *data;// = (int *)(linkList0_ + curNodeNum * size_links_per_element0_); if (layer == 0) { data = (int*)get_linklist0(curNodeNum); } else { data = (int*)get_linklist(curNodeNum, layer); // data = (int *) (linkLists_[curNodeNum] + (layer - 1) * size_links_per_element_); } size_t size = getListCount((linklistsizeint*)data); tableint *datal = (tableint *) (data + 1); #ifdef USE_SSE _mm_prefetch((char *) (visited_array + *(data + 1)), _MM_HINT_T0); _mm_prefetch((char *) (visited_array + *(data + 1) + 64), _MM_HINT_T0); _mm_prefetch(getDataByInternalId(*datal), _MM_HINT_T0); _mm_prefetch(getDataByInternalId(*(datal + 1)), _MM_HINT_T0); #endif for (size_t j = 0; j < size; j++) { tableint candidate_id = *(datal + j); // if (candidate_id == 0) continue; #ifdef USE_SSE _mm_prefetch((char *) (visited_array + *(datal + j + 1)), _MM_HINT_T0); _mm_prefetch(getDataByInternalId(*(datal + j + 1)), _MM_HINT_T0); #endif if (visited_array[candidate_id] == visited_array_tag) continue; visited_array[candidate_id] = visited_array_tag; char *currObj1 = (getDataByInternalId(candidate_id)); dist_t dist1 = fstdistfunc_(data_point, currObj1, dist_func_param_); if (top_candidates.size() < ef_construction_ || lowerBound > dist1) { candidateSet.emplace(-dist1, candidate_id); #ifdef USE_SSE _mm_prefetch(getDataByInternalId(candidateSet.top().second), _MM_HINT_T0); #endif if (!isMarkedDeleted(candidate_id)) top_candidates.emplace(dist1, candidate_id); if (top_candidates.size() > ef_construction_) top_candidates.pop(); if (!top_candidates.empty()) lowerBound = top_candidates.top().first; } } } visited_list_pool_->releaseVisitedList(vl); return top_candidates; } mutable std::atomic metric_distance_computations; mutable std::atomic metric_hops; template std::priority_queue, std::vector>, CompareByFirst> searchBaseLayerST(tableint ep_id, const void *data_point, size_t ef) const { VisitedList *vl = visited_list_pool_->getFreeVisitedList(); vl_type *visited_array = vl->mass; vl_type visited_array_tag = vl->curV; std::priority_queue, std::vector>, CompareByFirst> top_candidates; std::priority_queue, std::vector>, CompareByFirst> candidate_set; dist_t lowerBound; if (!has_deletions || !isMarkedDeleted(ep_id)) { dist_t dist = fstdistfunc_(data_point, getDataByInternalId(ep_id), dist_func_param_); lowerBound = dist; top_candidates.emplace(dist, ep_id); candidate_set.emplace(-dist, ep_id); } else { lowerBound = std::numeric_limits::max(); candidate_set.emplace(-lowerBound, ep_id); } visited_array[ep_id] = visited_array_tag; while (!candidate_set.empty()) { std::pair current_node_pair = candidate_set.top(); if ((-current_node_pair.first) > lowerBound && (top_candidates.size() == ef || has_deletions == false)) { break; } candidate_set.pop(); tableint current_node_id = current_node_pair.second; int *data = (int *) get_linklist0(current_node_id); size_t size = getListCount((linklistsizeint*)data); // bool cur_node_deleted = isMarkedDeleted(current_node_id); if(collect_metrics){ metric_hops++; metric_distance_computations+=size; } #ifdef USE_SSE _mm_prefetch((char *) (visited_array + *(data + 1)), _MM_HINT_T0); _mm_prefetch((char *) (visited_array + *(data + 1) + 64), _MM_HINT_T0); _mm_prefetch(data_level0_memory_ + (*(data + 1)) * size_data_per_element_ + offsetData_, _MM_HINT_T0); _mm_prefetch((char *) (data + 2), _MM_HINT_T0); #endif for (size_t j = 1; j <= size; j++) { int candidate_id = *(data + j); // if (candidate_id == 0) continue; #ifdef USE_SSE _mm_prefetch((char *) (visited_array + *(data + j + 1)), _MM_HINT_T0); _mm_prefetch(data_level0_memory_ + (*(data + j + 1)) * size_data_per_element_ + offsetData_, _MM_HINT_T0);//////////// #endif if (!(visited_array[candidate_id] == visited_array_tag)) { visited_array[candidate_id] = visited_array_tag; char *currObj1 = (getDataByInternalId(candidate_id)); dist_t dist = fstdistfunc_(data_point, currObj1, dist_func_param_); if (top_candidates.size() < ef || lowerBound > dist) { candidate_set.emplace(-dist, candidate_id); #ifdef USE_SSE _mm_prefetch(data_level0_memory_ + candidate_set.top().second * size_data_per_element_ + offsetLevel0_,/////////// _MM_HINT_T0);//////////////////////// #endif if (!has_deletions || !isMarkedDeleted(candidate_id)) top_candidates.emplace(dist, candidate_id); if (top_candidates.size() > ef) top_candidates.pop(); if (!top_candidates.empty()) lowerBound = top_candidates.top().first; } } } } visited_list_pool_->releaseVisitedList(vl); return top_candidates; } void getNeighborsByHeuristic2( std::priority_queue, std::vector>, CompareByFirst> &top_candidates, const size_t M) { if (top_candidates.size() < M) { return; } std::priority_queue> queue_closest; std::vector> return_list; while (top_candidates.size() > 0) { queue_closest.emplace(-top_candidates.top().first, top_candidates.top().second); top_candidates.pop(); } while (queue_closest.size()) { if (return_list.size() >= M) break; std::pair curent_pair = queue_closest.top(); dist_t dist_to_query = -curent_pair.first; queue_closest.pop(); bool good = true; for (std::pair second_pair : return_list) { dist_t curdist = fstdistfunc_(getDataByInternalId(second_pair.second), getDataByInternalId(curent_pair.second), dist_func_param_);; if (curdist < dist_to_query) { good = false; break; } } if (good) { return_list.push_back(curent_pair); } } for (std::pair curent_pair : return_list) { top_candidates.emplace(-curent_pair.first, curent_pair.second); } } linklistsizeint *get_linklist0(tableint internal_id) const { return (linklistsizeint *) (data_level0_memory_ + internal_id * size_data_per_element_ + offsetLevel0_); }; linklistsizeint *get_linklist0(tableint internal_id, char *data_level0_memory_) const { return (linklistsizeint *) (data_level0_memory_ + internal_id * size_data_per_element_ + offsetLevel0_); }; linklistsizeint *get_linklist(tableint internal_id, int level) const { return (linklistsizeint *) (linkLists_[internal_id] + (level - 1) * size_links_per_element_); }; linklistsizeint *get_linklist_at_level(tableint internal_id, int level) const { return level == 0 ? get_linklist0(internal_id) : get_linklist(internal_id, level); }; tableint mutuallyConnectNewElement(const void *data_point, tableint cur_c, std::priority_queue, std::vector>, CompareByFirst> &top_candidates, int level, bool isUpdate) { size_t Mcurmax = level ? maxM_ : maxM0_; getNeighborsByHeuristic2(top_candidates, M_); if (top_candidates.size() > M_) throw std::runtime_error("Should be not be more than M_ candidates returned by the heuristic"); std::vector selectedNeighbors; selectedNeighbors.reserve(M_); while (top_candidates.size() > 0) { selectedNeighbors.push_back(top_candidates.top().second); top_candidates.pop(); } tableint next_closest_entry_point = selectedNeighbors.back(); { linklistsizeint *ll_cur; if (level == 0) ll_cur = get_linklist0(cur_c); else ll_cur = get_linklist(cur_c, level); if (*ll_cur && !isUpdate) { throw std::runtime_error("The newly inserted element should have blank link list"); } setListCount(ll_cur,selectedNeighbors.size()); tableint *data = (tableint *) (ll_cur + 1); for (size_t idx = 0; idx < selectedNeighbors.size(); idx++) { if (data[idx] && !isUpdate) throw std::runtime_error("Possible memory corruption"); if (level > element_levels_[selectedNeighbors[idx]]) throw std::runtime_error("Trying to make a link on a non-existent level"); data[idx] = selectedNeighbors[idx]; } } for (size_t idx = 0; idx < selectedNeighbors.size(); idx++) { std::unique_lock lock(link_list_locks_[selectedNeighbors[idx]]); linklistsizeint *ll_other; if (level == 0) ll_other = get_linklist0(selectedNeighbors[idx]); else ll_other = get_linklist(selectedNeighbors[idx], level); size_t sz_link_list_other = getListCount(ll_other); if (sz_link_list_other > Mcurmax) throw std::runtime_error("Bad value of sz_link_list_other"); if (selectedNeighbors[idx] == cur_c) throw std::runtime_error("Trying to connect an element to itself"); if (level > element_levels_[selectedNeighbors[idx]]) throw std::runtime_error("Trying to make a link on a non-existent level"); tableint *data = (tableint *) (ll_other + 1); bool is_cur_c_present = false; if (isUpdate) { for (size_t j = 0; j < sz_link_list_other; j++) { if (data[j] == cur_c) { is_cur_c_present = true; break; } } } // If cur_c is already present in the neighboring connections of `selectedNeighbors[idx]` then no need to modify any connections or run the heuristics. if (!is_cur_c_present) { if (sz_link_list_other < Mcurmax) { data[sz_link_list_other] = cur_c; setListCount(ll_other, sz_link_list_other + 1); } else { // finding the "weakest" element to replace it with the new one dist_t d_max = fstdistfunc_(getDataByInternalId(cur_c), getDataByInternalId(selectedNeighbors[idx]), dist_func_param_); // Heuristic: std::priority_queue, std::vector>, CompareByFirst> candidates; candidates.emplace(d_max, cur_c); for (size_t j = 0; j < sz_link_list_other; j++) { candidates.emplace( fstdistfunc_(getDataByInternalId(data[j]), getDataByInternalId(selectedNeighbors[idx]), dist_func_param_), data[j]); } getNeighborsByHeuristic2(candidates, Mcurmax); int indx = 0; while (candidates.size() > 0) { data[indx] = candidates.top().second; candidates.pop(); indx++; } setListCount(ll_other, indx); // Nearest K: /*int indx = -1; for (int j = 0; j < sz_link_list_other; j++) { dist_t d = fstdistfunc_(getDataByInternalId(data[j]), getDataByInternalId(rez[idx]), dist_func_param_); if (d > d_max) { indx = j; d_max = d; } } if (indx >= 0) { data[indx] = cur_c; } */ } } } return next_closest_entry_point; } std::mutex global; size_t ef_; void setEf(size_t ef) { ef_ = ef; } std::priority_queue> searchKnnInternal(void *query_data, int k) { std::priority_queue> top_candidates; if (cur_element_count == 0) return top_candidates; tableint currObj = enterpoint_node_; dist_t curdist = fstdistfunc_(query_data, getDataByInternalId(enterpoint_node_), dist_func_param_); for (size_t level = maxlevel_; level > 0; level--) { bool changed = true; while (changed) { changed = false; int *data; data = (int *) get_linklist(currObj,level); int size = getListCount(data); tableint *datal = (tableint *) (data + 1); for (int i = 0; i < size; i++) { tableint cand = datal[i]; if (cand < 0 || cand > max_elements_) throw std::runtime_error("cand error"); dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), dist_func_param_); if (d < curdist) { curdist = d; currObj = cand; changed = true; } } } } if (num_deleted_) { std::priority_queue> top_candidates1=searchBaseLayerST(currObj, query_data, ef_); top_candidates.swap(top_candidates1); } else{ std::priority_queue> top_candidates1=searchBaseLayerST(currObj, query_data, ef_); top_candidates.swap(top_candidates1); } while (top_candidates.size() > k) { top_candidates.pop(); } return top_candidates; }; void resizeIndex(size_t new_max_elements){ if (new_max_elements(new_max_elements).swap(link_list_locks_); // Reallocate base layer char * data_level0_memory_new = (char *) realloc(data_level0_memory_, new_max_elements * size_data_per_element_); if (data_level0_memory_new == nullptr) throw std::runtime_error("Not enough memory: resizeIndex failed to allocate base layer"); data_level0_memory_ = data_level0_memory_new; // Reallocate all other layers char ** linkLists_new = (char **) realloc(linkLists_, sizeof(void *) * new_max_elements); if (linkLists_new == nullptr) throw std::runtime_error("Not enough memory: resizeIndex failed to allocate other layers"); linkLists_ = linkLists_new; max_elements_ = new_max_elements; } void saveIndex(const std::string &location) { std::ofstream output(location, std::ios::binary); std::streampos position; writeBinaryPOD(output, offsetLevel0_); writeBinaryPOD(output, max_elements_); writeBinaryPOD(output, cur_element_count); writeBinaryPOD(output, size_data_per_element_); writeBinaryPOD(output, label_offset_); writeBinaryPOD(output, offsetData_); writeBinaryPOD(output, maxlevel_); writeBinaryPOD(output, enterpoint_node_); writeBinaryPOD(output, maxM_); writeBinaryPOD(output, maxM0_); writeBinaryPOD(output, M_); writeBinaryPOD(output, mult_); writeBinaryPOD(output, ef_construction_); output.write(data_level0_memory_, cur_element_count * size_data_per_element_); for (size_t i = 0; i < cur_element_count; i++) { unsigned int linkListSize = element_levels_[i] > 0 ? size_links_per_element_ * element_levels_[i] : 0; writeBinaryPOD(output, linkListSize); if (linkListSize) output.write(linkLists_[i], linkListSize); } output.close(); } void loadIndex(const std::string &location, SpaceInterface *s, size_t max_elements_i=0) { std::ifstream input(location, std::ios::binary); if (!input.is_open()) throw std::runtime_error("Cannot open file"); // get file size: input.seekg(0,input.end); std::streampos total_filesize=input.tellg(); input.seekg(0,input.beg); readBinaryPOD(input, offsetLevel0_); readBinaryPOD(input, max_elements_); readBinaryPOD(input, cur_element_count); size_t max_elements = max_elements_i; if(max_elements < cur_element_count) max_elements = max_elements_; max_elements_ = max_elements; readBinaryPOD(input, size_data_per_element_); readBinaryPOD(input, label_offset_); readBinaryPOD(input, offsetData_); readBinaryPOD(input, maxlevel_); readBinaryPOD(input, enterpoint_node_); readBinaryPOD(input, maxM_); readBinaryPOD(input, maxM0_); readBinaryPOD(input, M_); readBinaryPOD(input, mult_); readBinaryPOD(input, ef_construction_); data_size_ = s->get_data_size(); fstdistfunc_ = s->get_dist_func(); dist_func_param_ = s->get_dist_func_param(); auto pos=input.tellg(); /// Optional - check if index is ok: input.seekg(cur_element_count * size_data_per_element_,input.cur); for (size_t i = 0; i < cur_element_count; i++) { if(input.tellg() < 0 || input.tellg()>=total_filesize){ throw std::runtime_error("Index seems to be corrupted or unsupported"); } unsigned int linkListSize; readBinaryPOD(input, linkListSize); if (linkListSize != 0) { input.seekg(linkListSize,input.cur); } } // throw exception if it either corrupted or old index if(input.tellg()!=total_filesize) throw std::runtime_error("Index seems to be corrupted or unsupported"); input.clear(); /// Optional check end input.seekg(pos,input.beg); data_level0_memory_ = (char *) malloc(max_elements * size_data_per_element_); if (data_level0_memory_ == nullptr) throw std::runtime_error("Not enough memory: loadIndex failed to allocate level0"); input.read(data_level0_memory_, cur_element_count * size_data_per_element_); size_links_per_element_ = maxM_ * sizeof(tableint) + sizeof(linklistsizeint); size_links_level0_ = maxM0_ * sizeof(tableint) + sizeof(linklistsizeint); std::vector(max_elements).swap(link_list_locks_); std::vector(max_update_element_locks).swap(link_list_update_locks_); visited_list_pool_ = new VisitedListPool(1, max_elements); linkLists_ = (char **) malloc(sizeof(void *) * max_elements); if (linkLists_ == nullptr) throw std::runtime_error("Not enough memory: loadIndex failed to allocate linklists"); element_levels_ = std::vector(max_elements); revSize_ = 1.0 / mult_; ef_ = 10; for (size_t i = 0; i < cur_element_count; i++) { label_lookup_[getExternalLabel(i)]=i; unsigned int linkListSize; readBinaryPOD(input, linkListSize); if (linkListSize == 0) { element_levels_[i] = 0; linkLists_[i] = nullptr; } else { element_levels_[i] = linkListSize / size_links_per_element_; linkLists_[i] = (char *) malloc(linkListSize); if (linkLists_[i] == nullptr) throw std::runtime_error("Not enough memory: loadIndex failed to allocate linklist"); input.read(linkLists_[i], linkListSize); } } for (size_t i = 0; i < cur_element_count; i++) { if(isMarkedDeleted(i)) num_deleted_ += 1; } input.close(); return; } template std::vector getDataByLabel(labeltype label) const { tableint label_c; auto search = label_lookup_.find(label); if (search == label_lookup_.end() || isMarkedDeleted(search->second)) { throw std::runtime_error("Label not found"); } label_c = search->second; char* data_ptrv = getDataByInternalId(label_c); size_t dim = *((size_t *) dist_func_param_); std::vector data; data_t* data_ptr = (data_t*) data_ptrv; for (int i = 0; i < dim; i++) { data.push_back(*data_ptr); data_ptr += 1; } return data; } static const unsigned char DELETE_MARK = 0x01; // static const unsigned char REUSE_MARK = 0x10; /** * Marks an element with the given label deleted, does NOT really change the current graph. * @param label */ void markDelete(labeltype label) { auto search = label_lookup_.find(label); if (search == label_lookup_.end()) { throw std::runtime_error("Label not found"); } tableint internalId = search->second; markDeletedInternal(internalId); } /** * Uses the first 8 bits of the memory for the linked list to store the mark, * whereas maxM0_ has to be limited to the lower 24 bits, however, still large enough in almost all cases. * @param internalId */ void markDeletedInternal(tableint internalId) { assert(internalId < cur_element_count); if (!isMarkedDeleted(internalId)) { unsigned char *ll_cur = ((unsigned char *)get_linklist0(internalId))+2; *ll_cur |= DELETE_MARK; num_deleted_ += 1; } else { throw std::runtime_error("The requested to delete element is already deleted"); } } /** * Remove the deleted mark of the node, does NOT really change the current graph. * @param label */ void unmarkDelete(labeltype label) { auto search = label_lookup_.find(label); if (search == label_lookup_.end()) { throw std::runtime_error("Label not found"); } tableint internalId = search->second; unmarkDeletedInternal(internalId); } /** * Remove the deleted mark of the node. * @param internalId */ void unmarkDeletedInternal(tableint internalId) { assert(internalId < cur_element_count); if (isMarkedDeleted(internalId)) { unsigned char *ll_cur = ((unsigned char *)get_linklist0(internalId))+2; *ll_cur &= ~DELETE_MARK; num_deleted_ -= 1; } else { throw std::runtime_error("The requested to undelete element is not deleted"); } } /** * Checks the first 8 bits of the memory to see if the element is marked deleted. * @param internalId * @return */ bool isMarkedDeleted(tableint internalId) const { unsigned char *ll_cur = ((unsigned char*)get_linklist0(internalId))+2; return *ll_cur & DELETE_MARK; } unsigned short int getListCount(linklistsizeint * ptr) const { return *((unsigned short int *)ptr); } void setListCount(linklistsizeint * ptr, unsigned short int size) const { *((unsigned short int*)(ptr))=*((unsigned short int *)&size); } void addPoint(const void *data_point, labeltype label) { addPoint(data_point, label,-1); } void updatePoint(const void *dataPoint, tableint internalId, float updateNeighborProbability) { // update the feature vector associated with existing point with new vector memcpy(getDataByInternalId(internalId), dataPoint, data_size_); int maxLevelCopy = maxlevel_; tableint entryPointCopy = enterpoint_node_; // If point to be updated is entry point and graph just contains single element then just return. if (entryPointCopy == internalId && cur_element_count == 1) return; int elemLevel = element_levels_[internalId]; std::uniform_real_distribution distribution(0.0, 1.0); for (int layer = 0; layer <= elemLevel; layer++) { std::unordered_set sCand; std::unordered_set sNeigh; std::vector listOneHop = getConnectionsWithLock(internalId, layer); if (listOneHop.size() == 0) continue; sCand.insert(internalId); for (auto&& elOneHop : listOneHop) { sCand.insert(elOneHop); if (distribution(update_probability_generator_) > updateNeighborProbability) continue; sNeigh.insert(elOneHop); std::vector listTwoHop = getConnectionsWithLock(elOneHop, layer); for (auto&& elTwoHop : listTwoHop) { sCand.insert(elTwoHop); } } for (auto&& neigh : sNeigh) { // if (neigh == internalId) // continue; std::priority_queue, std::vector>, CompareByFirst> candidates; size_t size = sCand.find(neigh) == sCand.end() ? sCand.size() : sCand.size() - 1; // sCand guaranteed to have size >= 1 size_t elementsToKeep = std::min(ef_construction_, size); for (auto&& cand : sCand) { if (cand == neigh) continue; dist_t distance = fstdistfunc_(getDataByInternalId(neigh), getDataByInternalId(cand), dist_func_param_); if (candidates.size() < elementsToKeep) { candidates.emplace(distance, cand); } else { if (distance < candidates.top().first) { candidates.pop(); candidates.emplace(distance, cand); } } } // Retrieve neighbours using heuristic and set connections. getNeighborsByHeuristic2(candidates, layer == 0 ? maxM0_ : maxM_); { std::unique_lock lock(link_list_locks_[neigh]); linklistsizeint *ll_cur; ll_cur = get_linklist_at_level(neigh, layer); size_t candSize = candidates.size(); setListCount(ll_cur, candSize); tableint *data = (tableint *) (ll_cur + 1); for (size_t idx = 0; idx < candSize; idx++) { data[idx] = candidates.top().second; candidates.pop(); } } } } repairConnectionsForUpdate(dataPoint, entryPointCopy, internalId, elemLevel, maxLevelCopy); }; void repairConnectionsForUpdate(const void *dataPoint, tableint entryPointInternalId, tableint dataPointInternalId, int dataPointLevel, int maxLevel) { tableint currObj = entryPointInternalId; if (dataPointLevel < maxLevel) { dist_t curdist = fstdistfunc_(dataPoint, getDataByInternalId(currObj), dist_func_param_); for (int level = maxLevel; level > dataPointLevel; level--) { bool changed = true; while (changed) { changed = false; unsigned int *data; std::unique_lock lock(link_list_locks_[currObj]); data = get_linklist_at_level(currObj,level); int size = getListCount(data); tableint *datal = (tableint *) (data + 1); #ifdef USE_SSE _mm_prefetch(getDataByInternalId(*datal), _MM_HINT_T0); #endif for (int i = 0; i < size; i++) { #ifdef USE_SSE _mm_prefetch(getDataByInternalId(*(datal + i + 1)), _MM_HINT_T0); #endif tableint cand = datal[i]; dist_t d = fstdistfunc_(dataPoint, getDataByInternalId(cand), dist_func_param_); if (d < curdist) { curdist = d; currObj = cand; changed = true; } } } } } if (dataPointLevel > maxLevel) throw std::runtime_error("Level of item to be updated cannot be bigger than max level"); for (int level = dataPointLevel; level >= 0; level--) { std::priority_queue, std::vector>, CompareByFirst> topCandidates = searchBaseLayer( currObj, dataPoint, level); std::priority_queue, std::vector>, CompareByFirst> filteredTopCandidates; while (topCandidates.size() > 0) { if (topCandidates.top().second != dataPointInternalId) filteredTopCandidates.push(topCandidates.top()); topCandidates.pop(); } // Since element_levels_ is being used to get `dataPointLevel`, there could be cases where `topCandidates` could just contains entry point itself. // To prevent self loops, the `topCandidates` is filtered and thus can be empty. if (filteredTopCandidates.size() > 0) { bool epDeleted = isMarkedDeleted(entryPointInternalId); if (epDeleted) { filteredTopCandidates.emplace(fstdistfunc_(dataPoint, getDataByInternalId(entryPointInternalId), dist_func_param_), entryPointInternalId); if (filteredTopCandidates.size() > ef_construction_) filteredTopCandidates.pop(); } currObj = mutuallyConnectNewElement(dataPoint, dataPointInternalId, filteredTopCandidates, level, true); } } } std::vector getConnectionsWithLock(tableint internalId, int level) { std::unique_lock lock(link_list_locks_[internalId]); unsigned int *data = get_linklist_at_level(internalId, level); int size = getListCount(data); std::vector result(size); tableint *ll = (tableint *) (data + 1); memcpy(result.data(), ll,size * sizeof(tableint)); return result; }; tableint addPoint(const void *data_point, labeltype label, int level) { tableint cur_c = 0; { // Checking if the element with the same label already exists // if so, updating it *instead* of creating a new element. std::unique_lock templock_curr(cur_element_count_guard_); auto search = label_lookup_.find(label); if (search != label_lookup_.end()) { tableint existingInternalId = search->second; templock_curr.unlock(); std::unique_lock lock_el_update(link_list_update_locks_[(existingInternalId & (max_update_element_locks - 1))]); if (isMarkedDeleted(existingInternalId)) { unmarkDeletedInternal(existingInternalId); } updatePoint(data_point, existingInternalId, 1.0); return existingInternalId; } if (cur_element_count >= max_elements_) { throw std::runtime_error("The number of elements exceeds the specified limit"); }; cur_c = cur_element_count; cur_element_count++; label_lookup_[label] = cur_c; } // Take update lock to prevent race conditions on an element with insertion/update at the same time. std::unique_lock lock_el_update(link_list_update_locks_[(cur_c & (max_update_element_locks - 1))]); std::unique_lock lock_el(link_list_locks_[cur_c]); int curlevel = getRandomLevel(mult_); if (level > 0) curlevel = level; element_levels_[cur_c] = curlevel; std::unique_lock templock(global); int maxlevelcopy = maxlevel_; if (curlevel <= maxlevelcopy) templock.unlock(); tableint currObj = enterpoint_node_; tableint enterpoint_copy = enterpoint_node_; memset(data_level0_memory_ + cur_c * size_data_per_element_ + offsetLevel0_, 0, size_data_per_element_); // Initialisation of the data and label memcpy(getExternalLabeLp(cur_c), &label, sizeof(labeltype)); memcpy(getDataByInternalId(cur_c), data_point, data_size_); if (curlevel) { linkLists_[cur_c] = (char *) malloc(size_links_per_element_ * curlevel + 1); if (linkLists_[cur_c] == nullptr) throw std::runtime_error("Not enough memory: addPoint failed to allocate linklist"); memset(linkLists_[cur_c], 0, size_links_per_element_ * curlevel + 1); } if ((signed)currObj != -1) { if (curlevel < maxlevelcopy) { dist_t curdist = fstdistfunc_(data_point, getDataByInternalId(currObj), dist_func_param_); for (int level = maxlevelcopy; level > curlevel; level--) { bool changed = true; while (changed) { changed = false; unsigned int *data; std::unique_lock lock(link_list_locks_[currObj]); data = get_linklist(currObj,level); int size = getListCount(data); tableint *datal = (tableint *) (data + 1); for (int i = 0; i < size; i++) { tableint cand = datal[i]; if (cand < 0 || cand > max_elements_) throw std::runtime_error("cand error"); dist_t d = fstdistfunc_(data_point, getDataByInternalId(cand), dist_func_param_); if (d < curdist) { curdist = d; currObj = cand; changed = true; } } } } } bool epDeleted = isMarkedDeleted(enterpoint_copy); for (int level = std::min(curlevel, maxlevelcopy); level >= 0; level--) { if (level > maxlevelcopy || level < 0) // possible? throw std::runtime_error("Level error"); std::priority_queue, std::vector>, CompareByFirst> top_candidates = searchBaseLayer( currObj, data_point, level); if (epDeleted) { top_candidates.emplace(fstdistfunc_(data_point, getDataByInternalId(enterpoint_copy), dist_func_param_), enterpoint_copy); if (top_candidates.size() > ef_construction_) top_candidates.pop(); } currObj = mutuallyConnectNewElement(data_point, cur_c, top_candidates, level, false); } } else { // Do nothing for the first element enterpoint_node_ = 0; maxlevel_ = curlevel; } //Releasing lock for the maximum level if (curlevel > maxlevelcopy) { enterpoint_node_ = cur_c; maxlevel_ = curlevel; } return cur_c; }; std::priority_queue> searchKnn(const void *query_data, size_t k) const { std::priority_queue> result; if (cur_element_count == 0) return result; tableint currObj = enterpoint_node_; dist_t curdist = fstdistfunc_(query_data, getDataByInternalId(enterpoint_node_), dist_func_param_); for (int level = maxlevel_; level > 0; level--) { bool changed = true; while (changed) { changed = false; unsigned int *data; data = (unsigned int *) get_linklist(currObj, level); int size = getListCount(data); metric_hops++; metric_distance_computations+=size; tableint *datal = (tableint *) (data + 1); for (int i = 0; i < size; i++) { tableint cand = datal[i]; if (cand < 0 || cand > max_elements_) throw std::runtime_error("cand error"); dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), dist_func_param_); if (d < curdist) { curdist = d; currObj = cand; changed = true; } } } } std::priority_queue, std::vector>, CompareByFirst> top_candidates; if (num_deleted_) { top_candidates=searchBaseLayerST( currObj, query_data, std::max(ef_, k)); } else{ top_candidates=searchBaseLayerST( currObj, query_data, std::max(ef_, k)); } while (top_candidates.size() > k) { top_candidates.pop(); } while (top_candidates.size() > 0) { std::pair rez = top_candidates.top(); result.push(std::pair(rez.first, getExternalLabel(rez.second))); top_candidates.pop(); } return result; }; void checkIntegrity(){ int connections_checked=0; std::vector inbound_connections_num(cur_element_count,0); for(int i = 0;i < cur_element_count; i++){ for(int l = 0;l <= element_levels_[i]; l++){ linklistsizeint *ll_cur = get_linklist_at_level(i,l); int size = getListCount(ll_cur); tableint *data = (tableint *) (ll_cur + 1); std::unordered_set s; for (int j=0; j 0); assert(data[j] < cur_element_count); assert (data[j] != i); inbound_connections_num[data[j]]++; s.insert(data[j]); connections_checked++; } assert(s.size() == size); } } if(cur_element_count > 1){ int min1=inbound_connections_num[0], max1=inbound_connections_num[0]; for(int i=0; i < cur_element_count; i++){ assert(inbound_connections_num[i] > 0); min1=std::min(inbound_connections_num[i],min1); max1=std::max(inbound_connections_num[i],max1); } std::cout << "Min inbound: " << min1 << ", Max inbound:" << max1 << "\n"; } std::cout << "integrity ok, checked " << connections_checked << " connections\n"; } }; } hnswlib-0.6.2/hnswlib/hnswlib.h000066400000000000000000000114701420255137300164570ustar00rootroot00000000000000#pragma once #ifndef NO_MANUAL_VECTORIZATION #ifdef __SSE__ #define USE_SSE #ifdef __AVX__ #define USE_AVX #ifdef __AVX512F__ #define USE_AVX512 #endif #endif #endif #endif #if defined(USE_AVX) || defined(USE_SSE) #ifdef _MSC_VER #include #include #include "cpu_x86.h" void cpu_x86::cpuid(int32_t out[4], int32_t eax, int32_t ecx) { __cpuidex(out, eax, ecx); } __int64 xgetbv(unsigned int x) { return _xgetbv(x); } #else #include #include #include void cpuid(int32_t cpuInfo[4], int32_t eax, int32_t ecx) { __cpuid_count(eax, ecx, cpuInfo[0], cpuInfo[1], cpuInfo[2], cpuInfo[3]); } uint64_t xgetbv(unsigned int index) { uint32_t eax, edx; __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index)); return ((uint64_t)edx << 32) | eax; } #endif #if defined(USE_AVX512) #include #endif #if defined(__GNUC__) #define PORTABLE_ALIGN32 __attribute__((aligned(32))) #define PORTABLE_ALIGN64 __attribute__((aligned(64))) #else #define PORTABLE_ALIGN32 __declspec(align(32)) #define PORTABLE_ALIGN64 __declspec(align(64)) #endif // Adapted from https://github.com/Mysticial/FeatureDetector #define _XCR_XFEATURE_ENABLED_MASK 0 bool AVXCapable() { int cpuInfo[4]; // CPU support cpuid(cpuInfo, 0, 0); int nIds = cpuInfo[0]; bool HW_AVX = false; if (nIds >= 0x00000001) { cpuid(cpuInfo, 0x00000001, 0); HW_AVX = (cpuInfo[2] & ((int)1 << 28)) != 0; } // OS support cpuid(cpuInfo, 1, 0); bool osUsesXSAVE_XRSTORE = (cpuInfo[2] & (1 << 27)) != 0; bool cpuAVXSuport = (cpuInfo[2] & (1 << 28)) != 0; bool avxSupported = false; if (osUsesXSAVE_XRSTORE && cpuAVXSuport) { uint64_t xcrFeatureMask = xgetbv(_XCR_XFEATURE_ENABLED_MASK); avxSupported = (xcrFeatureMask & 0x6) == 0x6; } return HW_AVX && avxSupported; } bool AVX512Capable() { if (!AVXCapable()) return false; int cpuInfo[4]; // CPU support cpuid(cpuInfo, 0, 0); int nIds = cpuInfo[0]; bool HW_AVX512F = false; if (nIds >= 0x00000007) { // AVX512 Foundation cpuid(cpuInfo, 0x00000007, 0); HW_AVX512F = (cpuInfo[1] & ((int)1 << 16)) != 0; } // OS support cpuid(cpuInfo, 1, 0); bool osUsesXSAVE_XRSTORE = (cpuInfo[2] & (1 << 27)) != 0; bool cpuAVXSuport = (cpuInfo[2] & (1 << 28)) != 0; bool avx512Supported = false; if (osUsesXSAVE_XRSTORE && cpuAVXSuport) { uint64_t xcrFeatureMask = xgetbv(_XCR_XFEATURE_ENABLED_MASK); avx512Supported = (xcrFeatureMask & 0xe6) == 0xe6; } return HW_AVX512F && avx512Supported; } #endif #include #include #include #include namespace hnswlib { typedef size_t labeltype; template class pairGreater { public: bool operator()(const T& p1, const T& p2) { return p1.first > p2.first; } }; template static void writeBinaryPOD(std::ostream &out, const T &podRef) { out.write((char *) &podRef, sizeof(T)); } template static void readBinaryPOD(std::istream &in, T &podRef) { in.read((char *) &podRef, sizeof(T)); } template using DISTFUNC = MTYPE(*)(const void *, const void *, const void *); template class SpaceInterface { public: //virtual void search(void *); virtual size_t get_data_size() = 0; virtual DISTFUNC get_dist_func() = 0; virtual void *get_dist_func_param() = 0; virtual ~SpaceInterface() {} }; template class AlgorithmInterface { public: virtual void addPoint(const void *datapoint, labeltype label)=0; virtual std::priority_queue> searchKnn(const void *, size_t) const = 0; // Return k nearest neighbor in the order of closer fist virtual std::vector> searchKnnCloserFirst(const void* query_data, size_t k) const; virtual void saveIndex(const std::string &location)=0; virtual ~AlgorithmInterface(){ } }; template std::vector> AlgorithmInterface::searchKnnCloserFirst(const void* query_data, size_t k) const { std::vector> result; // here searchKnn returns the result in the order of further first auto ret = searchKnn(query_data, k); { size_t sz = ret.size(); result.resize(sz); while (!ret.empty()) { result[--sz] = ret.top(); ret.pop(); } } return result; } } #include "space_l2.h" #include "space_ip.h" #include "bruteforce.h" #include "hnswalg.h" hnswlib-0.6.2/hnswlib/space_ip.h000066400000000000000000000272061420255137300166000ustar00rootroot00000000000000#pragma once #include "hnswlib.h" namespace hnswlib { static float InnerProduct(const void *pVect1, const void *pVect2, const void *qty_ptr) { size_t qty = *((size_t *) qty_ptr); float res = 0; for (unsigned i = 0; i < qty; i++) { res += ((float *) pVect1)[i] * ((float *) pVect2)[i]; } return res; } static float InnerProductDistance(const void *pVect1, const void *pVect2, const void *qty_ptr) { return 1.0f - InnerProduct(pVect1, pVect2, qty_ptr); } #if defined(USE_AVX) // Favor using AVX if available. static float InnerProductSIMD4ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { float PORTABLE_ALIGN32 TmpRes[8]; float *pVect1 = (float *) pVect1v; float *pVect2 = (float *) pVect2v; size_t qty = *((size_t *) qty_ptr); size_t qty16 = qty / 16; size_t qty4 = qty / 4; const float *pEnd1 = pVect1 + 16 * qty16; const float *pEnd2 = pVect1 + 4 * qty4; __m256 sum256 = _mm256_set1_ps(0); while (pVect1 < pEnd1) { //_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0); __m256 v1 = _mm256_loadu_ps(pVect1); pVect1 += 8; __m256 v2 = _mm256_loadu_ps(pVect2); pVect2 += 8; sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2)); v1 = _mm256_loadu_ps(pVect1); pVect1 += 8; v2 = _mm256_loadu_ps(pVect2); pVect2 += 8; sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2)); } __m128 v1, v2; __m128 sum_prod = _mm_add_ps(_mm256_extractf128_ps(sum256, 0), _mm256_extractf128_ps(sum256, 1)); while (pVect1 < pEnd2) { v1 = _mm_loadu_ps(pVect1); pVect1 += 4; v2 = _mm_loadu_ps(pVect2); pVect2 += 4; sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); } _mm_store_ps(TmpRes, sum_prod); float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];; return sum; } static float InnerProductDistanceSIMD4ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { return 1.0f - InnerProductSIMD4ExtAVX(pVect1v, pVect2v, qty_ptr); } #endif #if defined(USE_SSE) static float InnerProductSIMD4ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { float PORTABLE_ALIGN32 TmpRes[8]; float *pVect1 = (float *) pVect1v; float *pVect2 = (float *) pVect2v; size_t qty = *((size_t *) qty_ptr); size_t qty16 = qty / 16; size_t qty4 = qty / 4; const float *pEnd1 = pVect1 + 16 * qty16; const float *pEnd2 = pVect1 + 4 * qty4; __m128 v1, v2; __m128 sum_prod = _mm_set1_ps(0); while (pVect1 < pEnd1) { v1 = _mm_loadu_ps(pVect1); pVect1 += 4; v2 = _mm_loadu_ps(pVect2); pVect2 += 4; sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); v1 = _mm_loadu_ps(pVect1); pVect1 += 4; v2 = _mm_loadu_ps(pVect2); pVect2 += 4; sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); v1 = _mm_loadu_ps(pVect1); pVect1 += 4; v2 = _mm_loadu_ps(pVect2); pVect2 += 4; sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); v1 = _mm_loadu_ps(pVect1); pVect1 += 4; v2 = _mm_loadu_ps(pVect2); pVect2 += 4; sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); } while (pVect1 < pEnd2) { v1 = _mm_loadu_ps(pVect1); pVect1 += 4; v2 = _mm_loadu_ps(pVect2); pVect2 += 4; sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); } _mm_store_ps(TmpRes, sum_prod); float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3]; return sum; } static float InnerProductDistanceSIMD4ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { return 1.0f - InnerProductSIMD4ExtSSE(pVect1v, pVect2v, qty_ptr); } #endif #if defined(USE_AVX512) static float InnerProductSIMD16ExtAVX512(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { float PORTABLE_ALIGN64 TmpRes[16]; float *pVect1 = (float *) pVect1v; float *pVect2 = (float *) pVect2v; size_t qty = *((size_t *) qty_ptr); size_t qty16 = qty / 16; const float *pEnd1 = pVect1 + 16 * qty16; __m512 sum512 = _mm512_set1_ps(0); while (pVect1 < pEnd1) { //_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0); __m512 v1 = _mm512_loadu_ps(pVect1); pVect1 += 16; __m512 v2 = _mm512_loadu_ps(pVect2); pVect2 += 16; sum512 = _mm512_add_ps(sum512, _mm512_mul_ps(v1, v2)); } _mm512_store_ps(TmpRes, sum512); float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7] + TmpRes[8] + TmpRes[9] + TmpRes[10] + TmpRes[11] + TmpRes[12] + TmpRes[13] + TmpRes[14] + TmpRes[15]; return sum; } static float InnerProductDistanceSIMD16ExtAVX512(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { return 1.0f - InnerProductSIMD16ExtAVX512(pVect1v, pVect2v, qty_ptr); } #endif #if defined(USE_AVX) static float InnerProductSIMD16ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { float PORTABLE_ALIGN32 TmpRes[8]; float *pVect1 = (float *) pVect1v; float *pVect2 = (float *) pVect2v; size_t qty = *((size_t *) qty_ptr); size_t qty16 = qty / 16; const float *pEnd1 = pVect1 + 16 * qty16; __m256 sum256 = _mm256_set1_ps(0); while (pVect1 < pEnd1) { //_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0); __m256 v1 = _mm256_loadu_ps(pVect1); pVect1 += 8; __m256 v2 = _mm256_loadu_ps(pVect2); pVect2 += 8; sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2)); v1 = _mm256_loadu_ps(pVect1); pVect1 += 8; v2 = _mm256_loadu_ps(pVect2); pVect2 += 8; sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2)); } _mm256_store_ps(TmpRes, sum256); float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7]; return sum; } static float InnerProductDistanceSIMD16ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { return 1.0f - InnerProductSIMD16ExtAVX(pVect1v, pVect2v, qty_ptr); } #endif #if defined(USE_SSE) static float InnerProductSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { float PORTABLE_ALIGN32 TmpRes[8]; float *pVect1 = (float *) pVect1v; float *pVect2 = (float *) pVect2v; size_t qty = *((size_t *) qty_ptr); size_t qty16 = qty / 16; const float *pEnd1 = pVect1 + 16 * qty16; __m128 v1, v2; __m128 sum_prod = _mm_set1_ps(0); while (pVect1 < pEnd1) { v1 = _mm_loadu_ps(pVect1); pVect1 += 4; v2 = _mm_loadu_ps(pVect2); pVect2 += 4; sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); v1 = _mm_loadu_ps(pVect1); pVect1 += 4; v2 = _mm_loadu_ps(pVect2); pVect2 += 4; sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); v1 = _mm_loadu_ps(pVect1); pVect1 += 4; v2 = _mm_loadu_ps(pVect2); pVect2 += 4; sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); v1 = _mm_loadu_ps(pVect1); pVect1 += 4; v2 = _mm_loadu_ps(pVect2); pVect2 += 4; sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); } _mm_store_ps(TmpRes, sum_prod); float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3]; return sum; } static float InnerProductDistanceSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { return 1.0f - InnerProductSIMD16ExtSSE(pVect1v, pVect2v, qty_ptr); } #endif #if defined(USE_SSE) || defined(USE_AVX) || defined(USE_AVX512) DISTFUNC InnerProductSIMD16Ext = InnerProductSIMD16ExtSSE; DISTFUNC InnerProductSIMD4Ext = InnerProductSIMD4ExtSSE; DISTFUNC InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtSSE; DISTFUNC InnerProductDistanceSIMD4Ext = InnerProductDistanceSIMD4ExtSSE; static float InnerProductDistanceSIMD16ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { size_t qty = *((size_t *) qty_ptr); size_t qty16 = qty >> 4 << 4; float res = InnerProductSIMD16Ext(pVect1v, pVect2v, &qty16); float *pVect1 = (float *) pVect1v + qty16; float *pVect2 = (float *) pVect2v + qty16; size_t qty_left = qty - qty16; float res_tail = InnerProduct(pVect1, pVect2, &qty_left); return 1.0f - (res + res_tail); } static float InnerProductDistanceSIMD4ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { size_t qty = *((size_t *) qty_ptr); size_t qty4 = qty >> 2 << 2; float res = InnerProductSIMD4Ext(pVect1v, pVect2v, &qty4); size_t qty_left = qty - qty4; float *pVect1 = (float *) pVect1v + qty4; float *pVect2 = (float *) pVect2v + qty4; float res_tail = InnerProduct(pVect1, pVect2, &qty_left); return 1.0f - (res + res_tail); } #endif class InnerProductSpace : public SpaceInterface { DISTFUNC fstdistfunc_; size_t data_size_; size_t dim_; public: InnerProductSpace(size_t dim) { fstdistfunc_ = InnerProductDistance; #if defined(USE_AVX) || defined(USE_SSE) || defined(USE_AVX512) #if defined(USE_AVX512) if (AVX512Capable()) { InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX512; InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX512; } else if (AVXCapable()) { InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX; InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX; } #elif defined(USE_AVX) if (AVXCapable()) { InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX; InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX; } #endif #if defined(USE_AVX) if (AVXCapable()) { InnerProductSIMD4Ext = InnerProductSIMD4ExtAVX; InnerProductDistanceSIMD4Ext = InnerProductDistanceSIMD4ExtAVX; } #endif if (dim % 16 == 0) fstdistfunc_ = InnerProductDistanceSIMD16Ext; else if (dim % 4 == 0) fstdistfunc_ = InnerProductDistanceSIMD4Ext; else if (dim > 16) fstdistfunc_ = InnerProductDistanceSIMD16ExtResiduals; else if (dim > 4) fstdistfunc_ = InnerProductDistanceSIMD4ExtResiduals; #endif dim_ = dim; data_size_ = dim * sizeof(float); } size_t get_data_size() { return data_size_; } DISTFUNC get_dist_func() { return fstdistfunc_; } void *get_dist_func_param() { return &dim_; } ~InnerProductSpace() {} }; } hnswlib-0.6.2/hnswlib/space_l2.h000066400000000000000000000225241420255137300165030ustar00rootroot00000000000000#pragma once #include "hnswlib.h" namespace hnswlib { static float L2Sqr(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { float *pVect1 = (float *) pVect1v; float *pVect2 = (float *) pVect2v; size_t qty = *((size_t *) qty_ptr); float res = 0; for (size_t i = 0; i < qty; i++) { float t = *pVect1 - *pVect2; pVect1++; pVect2++; res += t * t; } return (res); } #if defined(USE_AVX512) // Favor using AVX512 if available. static float L2SqrSIMD16ExtAVX512(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { float *pVect1 = (float *) pVect1v; float *pVect2 = (float *) pVect2v; size_t qty = *((size_t *) qty_ptr); float PORTABLE_ALIGN64 TmpRes[16]; size_t qty16 = qty >> 4; const float *pEnd1 = pVect1 + (qty16 << 4); __m512 diff, v1, v2; __m512 sum = _mm512_set1_ps(0); while (pVect1 < pEnd1) { v1 = _mm512_loadu_ps(pVect1); pVect1 += 16; v2 = _mm512_loadu_ps(pVect2); pVect2 += 16; diff = _mm512_sub_ps(v1, v2); // sum = _mm512_fmadd_ps(diff, diff, sum); sum = _mm512_add_ps(sum, _mm512_mul_ps(diff, diff)); } _mm512_store_ps(TmpRes, sum); float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7] + TmpRes[8] + TmpRes[9] + TmpRes[10] + TmpRes[11] + TmpRes[12] + TmpRes[13] + TmpRes[14] + TmpRes[15]; return (res); } #endif #if defined(USE_AVX) // Favor using AVX if available. static float L2SqrSIMD16ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { float *pVect1 = (float *) pVect1v; float *pVect2 = (float *) pVect2v; size_t qty = *((size_t *) qty_ptr); float PORTABLE_ALIGN32 TmpRes[8]; size_t qty16 = qty >> 4; const float *pEnd1 = pVect1 + (qty16 << 4); __m256 diff, v1, v2; __m256 sum = _mm256_set1_ps(0); while (pVect1 < pEnd1) { v1 = _mm256_loadu_ps(pVect1); pVect1 += 8; v2 = _mm256_loadu_ps(pVect2); pVect2 += 8; diff = _mm256_sub_ps(v1, v2); sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff)); v1 = _mm256_loadu_ps(pVect1); pVect1 += 8; v2 = _mm256_loadu_ps(pVect2); pVect2 += 8; diff = _mm256_sub_ps(v1, v2); sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff)); } _mm256_store_ps(TmpRes, sum); return TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7]; } #endif #if defined(USE_SSE) static float L2SqrSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { float *pVect1 = (float *) pVect1v; float *pVect2 = (float *) pVect2v; size_t qty = *((size_t *) qty_ptr); float PORTABLE_ALIGN32 TmpRes[8]; size_t qty16 = qty >> 4; const float *pEnd1 = pVect1 + (qty16 << 4); __m128 diff, v1, v2; __m128 sum = _mm_set1_ps(0); while (pVect1 < pEnd1) { //_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0); v1 = _mm_loadu_ps(pVect1); pVect1 += 4; v2 = _mm_loadu_ps(pVect2); pVect2 += 4; diff = _mm_sub_ps(v1, v2); sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff)); v1 = _mm_loadu_ps(pVect1); pVect1 += 4; v2 = _mm_loadu_ps(pVect2); pVect2 += 4; diff = _mm_sub_ps(v1, v2); sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff)); v1 = _mm_loadu_ps(pVect1); pVect1 += 4; v2 = _mm_loadu_ps(pVect2); pVect2 += 4; diff = _mm_sub_ps(v1, v2); sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff)); v1 = _mm_loadu_ps(pVect1); pVect1 += 4; v2 = _mm_loadu_ps(pVect2); pVect2 += 4; diff = _mm_sub_ps(v1, v2); sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff)); } _mm_store_ps(TmpRes, sum); return TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3]; } #endif #if defined(USE_SSE) || defined(USE_AVX) || defined(USE_AVX512) DISTFUNC L2SqrSIMD16Ext = L2SqrSIMD16ExtSSE; static float L2SqrSIMD16ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { size_t qty = *((size_t *) qty_ptr); size_t qty16 = qty >> 4 << 4; float res = L2SqrSIMD16Ext(pVect1v, pVect2v, &qty16); float *pVect1 = (float *) pVect1v + qty16; float *pVect2 = (float *) pVect2v + qty16; size_t qty_left = qty - qty16; float res_tail = L2Sqr(pVect1, pVect2, &qty_left); return (res + res_tail); } #endif #if defined(USE_SSE) static float L2SqrSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { float PORTABLE_ALIGN32 TmpRes[8]; float *pVect1 = (float *) pVect1v; float *pVect2 = (float *) pVect2v; size_t qty = *((size_t *) qty_ptr); size_t qty4 = qty >> 2; const float *pEnd1 = pVect1 + (qty4 << 2); __m128 diff, v1, v2; __m128 sum = _mm_set1_ps(0); while (pVect1 < pEnd1) { v1 = _mm_loadu_ps(pVect1); pVect1 += 4; v2 = _mm_loadu_ps(pVect2); pVect2 += 4; diff = _mm_sub_ps(v1, v2); sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff)); } _mm_store_ps(TmpRes, sum); return TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3]; } static float L2SqrSIMD4ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { size_t qty = *((size_t *) qty_ptr); size_t qty4 = qty >> 2 << 2; float res = L2SqrSIMD4Ext(pVect1v, pVect2v, &qty4); size_t qty_left = qty - qty4; float *pVect1 = (float *) pVect1v + qty4; float *pVect2 = (float *) pVect2v + qty4; float res_tail = L2Sqr(pVect1, pVect2, &qty_left); return (res + res_tail); } #endif class L2Space : public SpaceInterface { DISTFUNC fstdistfunc_; size_t data_size_; size_t dim_; public: L2Space(size_t dim) { fstdistfunc_ = L2Sqr; #if defined(USE_SSE) || defined(USE_AVX) || defined(USE_AVX512) #if defined(USE_AVX512) if (AVX512Capable()) L2SqrSIMD16Ext = L2SqrSIMD16ExtAVX512; else if (AVXCapable()) L2SqrSIMD16Ext = L2SqrSIMD16ExtAVX; #elif defined(USE_AVX) if (AVXCapable()) L2SqrSIMD16Ext = L2SqrSIMD16ExtAVX; #endif if (dim % 16 == 0) fstdistfunc_ = L2SqrSIMD16Ext; else if (dim % 4 == 0) fstdistfunc_ = L2SqrSIMD4Ext; else if (dim > 16) fstdistfunc_ = L2SqrSIMD16ExtResiduals; else if (dim > 4) fstdistfunc_ = L2SqrSIMD4ExtResiduals; #endif dim_ = dim; data_size_ = dim * sizeof(float); } size_t get_data_size() { return data_size_; } DISTFUNC get_dist_func() { return fstdistfunc_; } void *get_dist_func_param() { return &dim_; } ~L2Space() {} }; static int L2SqrI4x(const void *__restrict pVect1, const void *__restrict pVect2, const void *__restrict qty_ptr) { size_t qty = *((size_t *) qty_ptr); int res = 0; unsigned char *a = (unsigned char *) pVect1; unsigned char *b = (unsigned char *) pVect2; qty = qty >> 2; for (size_t i = 0; i < qty; i++) { res += ((*a) - (*b)) * ((*a) - (*b)); a++; b++; res += ((*a) - (*b)) * ((*a) - (*b)); a++; b++; res += ((*a) - (*b)) * ((*a) - (*b)); a++; b++; res += ((*a) - (*b)) * ((*a) - (*b)); a++; b++; } return (res); } static int L2SqrI(const void* __restrict pVect1, const void* __restrict pVect2, const void* __restrict qty_ptr) { size_t qty = *((size_t*)qty_ptr); int res = 0; unsigned char* a = (unsigned char*)pVect1; unsigned char* b = (unsigned char*)pVect2; for(size_t i = 0; i < qty; i++) { res += ((*a) - (*b)) * ((*a) - (*b)); a++; b++; } return (res); } class L2SpaceI : public SpaceInterface { DISTFUNC fstdistfunc_; size_t data_size_; size_t dim_; public: L2SpaceI(size_t dim) { if(dim % 4 == 0) { fstdistfunc_ = L2SqrI4x; } else { fstdistfunc_ = L2SqrI; } dim_ = dim; data_size_ = dim * sizeof(unsigned char); } size_t get_data_size() { return data_size_; } DISTFUNC get_dist_func() { return fstdistfunc_; } void *get_dist_func_param() { return &dim_; } ~L2SpaceI() {} }; } hnswlib-0.6.2/hnswlib/visited_list_pool.h000066400000000000000000000037131420255137300205450ustar00rootroot00000000000000#pragma once #include #include #include namespace hnswlib { typedef unsigned short int vl_type; class VisitedList { public: vl_type curV; vl_type *mass; unsigned int numelements; VisitedList(int numelements1) { curV = -1; numelements = numelements1; mass = new vl_type[numelements]; } void reset() { curV++; if (curV == 0) { memset(mass, 0, sizeof(vl_type) * numelements); curV++; } }; ~VisitedList() { delete[] mass; } }; /////////////////////////////////////////////////////////// // // Class for multi-threaded pool-management of VisitedLists // ///////////////////////////////////////////////////////// class VisitedListPool { std::deque pool; std::mutex poolguard; int numelements; public: VisitedListPool(int initmaxpools, int numelements1) { numelements = numelements1; for (int i = 0; i < initmaxpools; i++) pool.push_front(new VisitedList(numelements)); } VisitedList *getFreeVisitedList() { VisitedList *rez; { std::unique_lock lock(poolguard); if (pool.size() > 0) { rez = pool.front(); pool.pop_front(); } else { rez = new VisitedList(numelements); } } rez->reset(); return rez; }; void releaseVisitedList(VisitedList *vl) { std::unique_lock lock(poolguard); pool.push_front(vl); }; ~VisitedListPool() { while (pool.size()) { VisitedList *rez = pool.front(); pool.pop_front(); delete rez; } }; }; } hnswlib-0.6.2/main.cpp000066400000000000000000000001071420255137300146150ustar00rootroot00000000000000 void sift_test1B(); int main() { sift_test1B(); return 0; };hnswlib-0.6.2/pyproject.toml000066400000000000000000000002241420255137300161010ustar00rootroot00000000000000[build-system] requires = [ "setuptools>=42", "wheel", "numpy>=1.10.0", "pybind11>=2.0", ] build-backend = "setuptools.build_meta" hnswlib-0.6.2/python_bindings/000077500000000000000000000000001420255137300163655ustar00rootroot00000000000000hnswlib-0.6.2/python_bindings/LazyIndex.py000066400000000000000000000032771420255137300206570ustar00rootroot00000000000000import hnswlib """ A python wrapper for lazy indexing, preserves the same api as hnswlib.Index but initializes the index only after adding items for the first time with `add_items`. """ class LazyIndex(hnswlib.Index): def __init__(self, space, dim,max_elements=1024, ef_construction=200, M=16): super().__init__(space, dim) self.init_max_elements=max_elements self.init_ef_construction=ef_construction self.init_M=M def init_index(self, max_elements=0,M=0,ef_construction=0): if max_elements>0: self.init_max_elements=max_elements if ef_construction>0: self.init_ef_construction=ef_construction if M>0: self.init_M=M super().init_index(self.init_max_elements, self.init_M, self.init_ef_construction) def add_items(self, data, ids=None, num_threads=-1): if self.max_elements==0: self.init_index() return super().add_items(data,ids, num_threads) def get_items(self, ids=None): if self.max_elements==0: return [] return super().get_items(ids) def knn_query(self, data,k=1, num_threads=-1): if self.max_elements==0: return [], [] return super().knn_query(data, k, num_threads) def resize_index(self, size): if self.max_elements==0: return self.init_index(size) else: return super().resize_index(size) def set_ef(self, ef): if self.max_elements==0: self.init_ef_construction=ef return super().set_ef(ef) def get_max_elements(self): return self.max_elements def get_current_count(self): return self.element_count hnswlib-0.6.2/python_bindings/__init__.py000066400000000000000000000000001420255137300204640ustar00rootroot00000000000000hnswlib-0.6.2/python_bindings/bindings.cpp000066400000000000000000001105571420255137300206770ustar00rootroot00000000000000#include #include #include #include #include "hnswlib.h" #include #include #include #include namespace py = pybind11; using namespace pybind11::literals; // needed to bring in _a literal /* * replacement for the openmp '#pragma omp parallel for' directive * only handles a subset of functionality (no reductions etc) * Process ids from start (inclusive) to end (EXCLUSIVE) * * The method is borrowed from nmslib */ template inline void ParallelFor(size_t start, size_t end, size_t numThreads, Function fn) { if (numThreads <= 0) { numThreads = std::thread::hardware_concurrency(); } if (numThreads == 1) { for (size_t id = start; id < end; id++) { fn(id, 0); } } else { std::vector threads; std::atomic current(start); // keep track of exceptions in threads // https://stackoverflow.com/a/32428427/1713196 std::exception_ptr lastException = nullptr; std::mutex lastExceptMutex; for (size_t threadId = 0; threadId < numThreads; ++threadId) { threads.push_back(std::thread([&, threadId] { while (true) { size_t id = current.fetch_add(1); if ((id >= end)) { break; } try { fn(id, threadId); } catch (...) { std::unique_lock lastExcepLock(lastExceptMutex); lastException = std::current_exception(); /* * This will work even when current is the largest value that * size_t can fit, because fetch_add returns the previous value * before the increment (what will result in overflow * and produce 0 instead of current + 1). */ current = end; break; } } })); } for (auto &thread : threads) { thread.join(); } if (lastException) { std::rethrow_exception(lastException); } } } inline void assert_true(bool expr, const std::string & msg) { if (expr == false) throw std::runtime_error("Unpickle Error: "+msg); return; } template class Index { public: Index(const std::string &space_name, const int dim) : space_name(space_name), dim(dim) { normalize=false; if(space_name=="l2") { l2space = new hnswlib::L2Space(dim); } else if(space_name=="ip") { l2space = new hnswlib::InnerProductSpace(dim); } else if(space_name=="cosine") { l2space = new hnswlib::InnerProductSpace(dim); normalize=true; } else { throw new std::runtime_error("Space name must be one of l2, ip, or cosine."); } appr_alg = NULL; ep_added = true; index_inited = false; num_threads_default = std::thread::hardware_concurrency(); default_ef=10; } static const int ser_version = 1; // serialization version std::string space_name; int dim; size_t seed; size_t default_ef; bool index_inited; bool ep_added; bool normalize; int num_threads_default; hnswlib::labeltype cur_l; hnswlib::HierarchicalNSW *appr_alg; hnswlib::SpaceInterface *l2space; ~Index() { delete l2space; if (appr_alg) delete appr_alg; } void init_new_index(const size_t maxElements, const size_t M, const size_t efConstruction, const size_t random_seed) { if (appr_alg) { throw new std::runtime_error("The index is already initiated."); } cur_l = 0; appr_alg = new hnswlib::HierarchicalNSW(l2space, maxElements, M, efConstruction, random_seed); index_inited = true; ep_added = false; appr_alg->ef_ = default_ef; seed=random_seed; } void set_ef(size_t ef) { default_ef=ef; if (appr_alg) appr_alg->ef_ = ef; } void set_num_threads(int num_threads) { this->num_threads_default = num_threads; } void saveIndex(const std::string &path_to_index) { appr_alg->saveIndex(path_to_index); } void loadIndex(const std::string &path_to_index, size_t max_elements) { if (appr_alg) { std::cerr<<"Warning: Calling load_index for an already inited index. Old index is being deallocated."; delete appr_alg; } appr_alg = new hnswlib::HierarchicalNSW(l2space, path_to_index, false, max_elements); cur_l = appr_alg->cur_element_count; index_inited = true; } void normalize_vector(float *data, float *norm_array){ float norm=0.0f; for(int i=0;i items(input); auto buffer = items.request(); if (num_threads <= 0) num_threads = num_threads_default; size_t rows, features; if (buffer.ndim != 2 && buffer.ndim != 1) throw std::runtime_error("data must be a 1d/2d array"); if (buffer.ndim == 2) { rows = buffer.shape[0]; features = buffer.shape[1]; } else{ rows = 1; features = buffer.shape[0]; } if (features != dim) throw std::runtime_error("wrong dimensionality of the vectors"); // avoid using threads when the number of searches is small: if(rows<=num_threads*4){ num_threads=1; } std::vector ids; if (!ids_.is_none()) { py::array_t < size_t, py::array::c_style | py::array::forcecast > items(ids_); auto ids_numpy = items.request(); if(ids_numpy.ndim == 1 && ids_numpy.shape[0] == rows) { std::vector ids1(ids_numpy.shape[0]); for (size_t i = 0; i < ids1.size(); i++) { ids1[i] = items.data()[i]; } ids.swap(ids1); } else if(ids_numpy.ndim == 0 && rows == 1) { ids.push_back(*items.data()); } else throw std::runtime_error("wrong dimensionality of the labels"); } { int start = 0; if (!ep_added) { size_t id = ids.size() ? ids.at(0) : (cur_l); float *vector_data = (float *) items.data(0); std::vector norm_array(dim); if(normalize){ normalize_vector(vector_data, norm_array.data()); vector_data = norm_array.data(); } appr_alg->addPoint((void *) vector_data, (size_t) id); start = 1; ep_added = true; } py::gil_scoped_release l; if(normalize==false) { ParallelFor(start, rows, num_threads, [&](size_t row, size_t threadId) { size_t id = ids.size() ? ids.at(row) : (cur_l+row); appr_alg->addPoint((void *) items.data(row), (size_t) id); }); } else{ std::vector norm_array(num_threads * dim); ParallelFor(start, rows, num_threads, [&](size_t row, size_t threadId) { // normalize vector: size_t start_idx = threadId * dim; normalize_vector((float *) items.data(row), (norm_array.data()+start_idx)); size_t id = ids.size() ? ids.at(row) : (cur_l+row); appr_alg->addPoint((void *) (norm_array.data()+start_idx), (size_t) id); }); }; cur_l+=rows; } } std::vector> getDataReturnList(py::object ids_ = py::none()) { std::vector ids; if (!ids_.is_none()) { py::array_t < size_t, py::array::c_style | py::array::forcecast > items(ids_); auto ids_numpy = items.request(); if (ids_numpy.ndim == 0) { throw std::invalid_argument("get_items accepts a list of indices and returns a list of vectors"); } else { std::vector ids1(ids_numpy.shape[0]); for (size_t i = 0; i < ids1.size(); i++) { ids1[i] = items.data()[i]; } ids.swap(ids1); } } std::vector> data; for (auto id : ids) { data.push_back(appr_alg->template getDataByLabel(id)); } return data; } std::vector getIdsList() { std::vector ids; for(auto kv : appr_alg->label_lookup_) { ids.push_back(kv.first); } return ids; } py::dict getAnnData() const { /* WARNING: Index::getAnnData is not thread-safe with Index::addItems */ std::unique_lock templock(appr_alg->global); size_t level0_npy_size = appr_alg->cur_element_count * appr_alg->size_data_per_element_; size_t link_npy_size = 0; std::vector link_npy_offsets(appr_alg->cur_element_count); for (size_t i = 0; i < appr_alg->cur_element_count; i++){ size_t linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0; link_npy_offsets[i]=link_npy_size; if (linkListSize) link_npy_size += linkListSize; } char* data_level0_npy = (char *) malloc(level0_npy_size); char* link_list_npy = (char *) malloc(link_npy_size); int* element_levels_npy = (int *) malloc(appr_alg->element_levels_.size()*sizeof(int)); hnswlib::labeltype* label_lookup_key_npy = (hnswlib::labeltype *) malloc(appr_alg->label_lookup_.size()*sizeof(hnswlib::labeltype)); hnswlib::tableint* label_lookup_val_npy = (hnswlib::tableint *) malloc(appr_alg->label_lookup_.size()*sizeof(hnswlib::tableint)); memset(label_lookup_key_npy, -1, appr_alg->label_lookup_.size()*sizeof(hnswlib::labeltype)); memset(label_lookup_val_npy, -1, appr_alg->label_lookup_.size()*sizeof(hnswlib::tableint)); size_t idx=0; for ( auto it = appr_alg->label_lookup_.begin(); it != appr_alg->label_lookup_.end(); ++it ){ label_lookup_key_npy[idx]= it->first; label_lookup_val_npy[idx]= it->second; idx++; } memset(link_list_npy, 0, link_npy_size); memcpy(data_level0_npy, appr_alg->data_level0_memory_, level0_npy_size); memcpy(element_levels_npy, appr_alg->element_levels_.data(), appr_alg->element_levels_.size() * sizeof(int)); for (size_t i = 0; i < appr_alg->cur_element_count; i++){ size_t linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0; if (linkListSize){ memcpy(link_list_npy+link_npy_offsets[i], appr_alg->linkLists_[i], linkListSize); } } py::capsule free_when_done_l0(data_level0_npy, [](void *f) { delete[] f; }); py::capsule free_when_done_lvl(element_levels_npy, [](void *f) { delete[] f; }); py::capsule free_when_done_lb(label_lookup_key_npy, [](void *f) { delete[] f; }); py::capsule free_when_done_id(label_lookup_val_npy, [](void *f) { delete[] f; }); py::capsule free_when_done_ll(link_list_npy, [](void *f) { delete[] f; }); /* TODO: serialize state of random generators appr_alg->level_generator_ and appr_alg->update_probability_generator_ */ /* for full reproducibility / to avoid re-initializing generators inside Index::createFromParams */ return py::dict( "offset_level0"_a=appr_alg->offsetLevel0_, "max_elements"_a=appr_alg->max_elements_, "cur_element_count"_a=appr_alg->cur_element_count, "size_data_per_element"_a=appr_alg->size_data_per_element_, "label_offset"_a=appr_alg->label_offset_, "offset_data"_a=appr_alg->offsetData_, "max_level"_a=appr_alg->maxlevel_, "enterpoint_node"_a=appr_alg->enterpoint_node_, "max_M"_a=appr_alg->maxM_, "max_M0"_a=appr_alg->maxM0_, "M"_a=appr_alg->M_, "mult"_a=appr_alg->mult_, "ef_construction"_a=appr_alg->ef_construction_, "ef"_a=appr_alg->ef_, "has_deletions"_a=(bool)appr_alg->num_deleted_, "size_links_per_element"_a=appr_alg->size_links_per_element_, "label_lookup_external"_a=py::array_t( {appr_alg->label_lookup_.size()}, // shape {sizeof(hnswlib::labeltype)}, // C-style contiguous strides for double label_lookup_key_npy, // the data pointer free_when_done_lb), "label_lookup_internal"_a=py::array_t( {appr_alg->label_lookup_.size()}, // shape {sizeof(hnswlib::tableint)}, // C-style contiguous strides for double label_lookup_val_npy, // the data pointer free_when_done_id), "element_levels"_a=py::array_t( {appr_alg->element_levels_.size()}, // shape {sizeof(int)}, // C-style contiguous strides for double element_levels_npy, // the data pointer free_when_done_lvl), // linkLists_,element_levels_,data_level0_memory_ "data_level0"_a=py::array_t( {level0_npy_size}, // shape {sizeof(char)}, // C-style contiguous strides for double data_level0_npy, // the data pointer free_when_done_l0), "link_lists"_a=py::array_t( {link_npy_size}, // shape {sizeof(char)}, // C-style contiguous strides for double link_list_npy, // the data pointer free_when_done_ll) ); } py::dict getIndexParams() const { /* WARNING: Index::getAnnData is not thread-safe with Index::addItems */ auto params = py::dict( "ser_version"_a=py::int_(Index::ser_version), //serialization version "space"_a=space_name, "dim"_a=dim, "index_inited"_a=index_inited, "ep_added"_a=ep_added, "normalize"_a=normalize, "num_threads"_a=num_threads_default, "seed"_a=seed ); if(index_inited == false) return py::dict( **params, "ef"_a=default_ef); auto ann_params = getAnnData(); return py::dict(**params, **ann_params); } static Index * createFromParams(const py::dict d) { // check serialization version assert_true(((int)py::int_(Index::ser_version)) >= d["ser_version"].cast(), "Invalid serialization version!"); auto space_name_=d["space"].cast(); auto dim_=d["dim"].cast(); auto index_inited_=d["index_inited"].cast(); Index *new_index = new Index(space_name_, dim_); /* TODO: deserialize state of random generators into new_index->level_generator_ and new_index->update_probability_generator_ */ /* for full reproducibility / state of generators is serialized inside Index::getIndexParams */ new_index->seed = d["seed"].cast(); if (index_inited_){ new_index->appr_alg = new hnswlib::HierarchicalNSW(new_index->l2space, d["max_elements"].cast(), d["M"].cast(), d["ef_construction"].cast(), new_index->seed); new_index->cur_l = d["cur_element_count"].cast(); } new_index->index_inited = index_inited_; new_index->ep_added=d["ep_added"].cast(); new_index->num_threads_default=d["num_threads"].cast(); new_index->default_ef=d["ef"].cast(); if (index_inited_) new_index->setAnnData(d); return new_index; } static Index * createFromIndex(const Index & index) { return createFromParams(index.getIndexParams()); } void setAnnData(const py::dict d) { /* WARNING: Index::setAnnData is not thread-safe with Index::addItems */ std::unique_lock templock(appr_alg->global); assert_true(appr_alg->offsetLevel0_ == d["offset_level0"].cast(), "Invalid value of offsetLevel0_ "); assert_true(appr_alg->max_elements_ == d["max_elements"].cast(), "Invalid value of max_elements_ "); appr_alg->cur_element_count = d["cur_element_count"].cast(); assert_true(appr_alg->size_data_per_element_ == d["size_data_per_element"].cast(), "Invalid value of size_data_per_element_ "); assert_true(appr_alg->label_offset_ == d["label_offset"].cast(), "Invalid value of label_offset_ "); assert_true(appr_alg->offsetData_ == d["offset_data"].cast(), "Invalid value of offsetData_ "); appr_alg->maxlevel_ = d["max_level"].cast(); appr_alg->enterpoint_node_ = d["enterpoint_node"].cast(); assert_true(appr_alg->maxM_ == d["max_M"].cast(), "Invalid value of maxM_ "); assert_true(appr_alg->maxM0_ == d["max_M0"].cast(), "Invalid value of maxM0_ "); assert_true(appr_alg->M_ == d["M"].cast(), "Invalid value of M_ "); assert_true(appr_alg->mult_ == d["mult"].cast(), "Invalid value of mult_ "); assert_true(appr_alg->ef_construction_ == d["ef_construction"].cast(), "Invalid value of ef_construction_ "); appr_alg->ef_ = d["ef"].cast(); assert_true(appr_alg->size_links_per_element_ == d["size_links_per_element"].cast(), "Invalid value of size_links_per_element_ "); auto label_lookup_key_npy = d["label_lookup_external"].cast >(); auto label_lookup_val_npy = d["label_lookup_internal"].cast >(); auto element_levels_npy = d["element_levels"].cast >(); auto data_level0_npy = d["data_level0"].cast >(); auto link_list_npy = d["link_lists"].cast >(); for (size_t i = 0; i < appr_alg->cur_element_count; i++){ if (label_lookup_val_npy.data()[i] < 0){ throw std::runtime_error("internal id cannot be negative!"); } else{ appr_alg->label_lookup_.insert(std::make_pair(label_lookup_key_npy.data()[i], label_lookup_val_npy.data()[i])); } } memcpy(appr_alg->element_levels_.data(), element_levels_npy.data(), element_levels_npy.nbytes()); size_t link_npy_size = 0; std::vector link_npy_offsets(appr_alg->cur_element_count); for (size_t i = 0; i < appr_alg->cur_element_count; i++){ size_t linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0; link_npy_offsets[i]=link_npy_size; if (linkListSize) link_npy_size += linkListSize; } memcpy(appr_alg->data_level0_memory_, data_level0_npy.data(), data_level0_npy.nbytes()); for (size_t i = 0; i < appr_alg->max_elements_; i++) { size_t linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0; if (linkListSize == 0) { appr_alg->linkLists_[i] = nullptr; } else { appr_alg->linkLists_[i] = (char *) malloc(linkListSize); if (appr_alg->linkLists_[i] == nullptr) throw std::runtime_error("Not enough memory: loadIndex failed to allocate linklist"); memcpy(appr_alg->linkLists_[i], link_list_npy.data()+link_npy_offsets[i], linkListSize); } } // set num_deleted appr_alg->num_deleted_ = 0; bool has_deletions = d["has_deletions"].cast(); if (has_deletions) { for (size_t i = 0; i < appr_alg->cur_element_count; i++) { if(appr_alg->isMarkedDeleted(i)) appr_alg->num_deleted_ += 1; } } } py::object knnQuery_return_numpy(py::object input, size_t k = 1, int num_threads = -1) { py::array_t < dist_t, py::array::c_style | py::array::forcecast > items(input); auto buffer = items.request(); hnswlib::labeltype *data_numpy_l; dist_t *data_numpy_d; size_t rows, features; if (num_threads <= 0) num_threads = num_threads_default; { py::gil_scoped_release l; if (buffer.ndim != 2 && buffer.ndim != 1) throw std::runtime_error("data must be a 1d/2d array"); if (buffer.ndim == 2) { rows = buffer.shape[0]; features = buffer.shape[1]; } else{ rows = 1; features = buffer.shape[0]; } // avoid using threads when the number of searches is small: if(rows<=num_threads*4){ num_threads=1; } data_numpy_l = new hnswlib::labeltype[rows * k]; data_numpy_d = new dist_t[rows * k]; if(normalize==false) { ParallelFor(0, rows, num_threads, [&](size_t row, size_t threadId) { std::priority_queue> result = appr_alg->searchKnn( (void *) items.data(row), k); if (result.size() != k) throw std::runtime_error( "Cannot return the results in a contigious 2D array. Probably ef or M is too small"); for (int i = k - 1; i >= 0; i--) { auto &result_tuple = result.top(); data_numpy_d[row * k + i] = result_tuple.first; data_numpy_l[row * k + i] = result_tuple.second; result.pop(); } } ); } else{ std::vector norm_array(num_threads*features); ParallelFor(0, rows, num_threads, [&](size_t row, size_t threadId) { float *data= (float *) items.data(row); size_t start_idx = threadId * dim; normalize_vector((float *) items.data(row), (norm_array.data()+start_idx)); std::priority_queue> result = appr_alg->searchKnn( (void *) (norm_array.data()+start_idx), k); if (result.size() != k) throw std::runtime_error( "Cannot return the results in a contigious 2D array. Probably ef or M is too small"); for (int i = k - 1; i >= 0; i--) { auto &result_tuple = result.top(); data_numpy_d[row * k + i] = result_tuple.first; data_numpy_l[row * k + i] = result_tuple.second; result.pop(); } } ); } } py::capsule free_when_done_l(data_numpy_l, [](void *f) { delete[] f; }); py::capsule free_when_done_d(data_numpy_d, [](void *f) { delete[] f; }); return py::make_tuple( py::array_t( {rows, k}, // shape {k * sizeof(hnswlib::labeltype), sizeof(hnswlib::labeltype)}, // C-style contiguous strides for double data_numpy_l, // the data pointer free_when_done_l), py::array_t( {rows, k}, // shape {k * sizeof(dist_t), sizeof(dist_t)}, // C-style contiguous strides for double data_numpy_d, // the data pointer free_when_done_d)); } void markDeleted(size_t label) { appr_alg->markDelete(label); } void unmarkDeleted(size_t label) { appr_alg->unmarkDelete(label); } void resizeIndex(size_t new_size) { appr_alg->resizeIndex(new_size); } size_t getMaxElements() const { return appr_alg->max_elements_; } size_t getCurrentCount() const { return appr_alg->cur_element_count; } }; template class BFIndex { public: BFIndex(const std::string &space_name, const int dim) : space_name(space_name), dim(dim) { normalize=false; if(space_name=="l2") { space = new hnswlib::L2Space(dim); } else if(space_name=="ip") { space = new hnswlib::InnerProductSpace(dim); } else if(space_name=="cosine") { space = new hnswlib::InnerProductSpace(dim); normalize=true; } else { throw new std::runtime_error("Space name must be one of l2, ip, or cosine."); } alg = NULL; index_inited = false; } static const int ser_version = 1; // serialization version std::string space_name; int dim; bool index_inited; bool normalize; hnswlib::labeltype cur_l; hnswlib::BruteforceSearch *alg; hnswlib::SpaceInterface *space; ~BFIndex() { delete space; if (alg) delete alg; } void init_new_index(const size_t maxElements) { if (alg) { throw new std::runtime_error("The index is already initiated."); } cur_l = 0; alg = new hnswlib::BruteforceSearch(space, maxElements); index_inited = true; } void normalize_vector(float *data, float *norm_array){ float norm=0.0f; for(int i=0;i items(input); auto buffer = items.request(); size_t rows, features; if (buffer.ndim != 2 && buffer.ndim != 1) throw std::runtime_error("data must be a 1d/2d array"); if (buffer.ndim == 2) { rows = buffer.shape[0]; features = buffer.shape[1]; } else { rows = 1; features = buffer.shape[0]; } if (features != dim) throw std::runtime_error("wrong dimensionality of the vectors"); std::vector ids; if (!ids_.is_none()) { py::array_t < size_t, py::array::c_style | py::array::forcecast > items(ids_); auto ids_numpy = items.request(); if (ids_numpy.ndim == 1 && ids_numpy.shape[0] == rows) { std::vector ids1(ids_numpy.shape[0]); for (size_t i = 0; i < ids1.size(); i++) { ids1[i] = items.data()[i]; } ids.swap(ids1); } else if (ids_numpy.ndim == 0 && rows == 1) { ids.push_back(*items.data()); } else throw std::runtime_error("wrong dimensionality of the labels"); } { for (size_t row = 0; row < rows; row++) { size_t id = ids.size() ? ids.at(row) : cur_l + row; if (!normalize) { alg->addPoint((void *) items.data(row), (size_t) id); } else { std::vector normalized_vector(dim); normalize_vector((float *)items.data(row), normalized_vector.data()); alg->addPoint((void *) normalized_vector.data(), (size_t) id); } } cur_l+=rows; } } void deleteVector(size_t label) { alg->removePoint(label); } void saveIndex(const std::string &path_to_index) { alg->saveIndex(path_to_index); } void loadIndex(const std::string &path_to_index, size_t max_elements) { if (alg) { std::cerr<<"Warning: Calling load_index for an already inited index. Old index is being deallocated."; delete alg; } alg = new hnswlib::BruteforceSearch(space, path_to_index); cur_l = alg->cur_element_count; index_inited = true; } py::object knnQuery_return_numpy(py::object input, size_t k = 1) { py::array_t < dist_t, py::array::c_style | py::array::forcecast > items(input); auto buffer = items.request(); hnswlib::labeltype *data_numpy_l; dist_t *data_numpy_d; size_t rows, features; { py::gil_scoped_release l; if (buffer.ndim != 2 && buffer.ndim != 1) throw std::runtime_error("data must be a 1d/2d array"); if (buffer.ndim == 2) { rows = buffer.shape[0]; features = buffer.shape[1]; } else { rows = 1; features = buffer.shape[0]; } data_numpy_l = new hnswlib::labeltype[rows * k]; data_numpy_d = new dist_t[rows * k]; for (size_t row = 0; row < rows; row++) { std::priority_queue> result = alg->searchKnn( (void *) items.data(row), k); for (int i = k - 1; i >= 0; i--) { auto &result_tuple = result.top(); data_numpy_d[row * k + i] = result_tuple.first; data_numpy_l[row * k + i] = result_tuple.second; result.pop(); } } } py::capsule free_when_done_l(data_numpy_l, [](void *f) { delete[] f; }); py::capsule free_when_done_d(data_numpy_d, [](void *f) { delete[] f; }); return py::make_tuple( py::array_t( {rows, k}, // shape {k * sizeof(hnswlib::labeltype), sizeof(hnswlib::labeltype)}, // C-style contiguous strides for double data_numpy_l, // the data pointer free_when_done_l), py::array_t( {rows, k}, // shape {k * sizeof(dist_t), sizeof(dist_t)}, // C-style contiguous strides for double data_numpy_d, // the data pointer free_when_done_d)); } }; PYBIND11_PLUGIN(hnswlib) { py::module m("hnswlib"); py::class_>(m, "Index") .def(py::init(&Index::createFromParams), py::arg("params")) /* WARNING: Index::createFromIndex is not thread-safe with Index::addItems */ .def(py::init(&Index::createFromIndex), py::arg("index")) .def(py::init(), py::arg("space"), py::arg("dim")) .def("init_index", &Index::init_new_index, py::arg("max_elements"), py::arg("M")=16, py::arg("ef_construction")=200, py::arg("random_seed")=100) .def("knn_query", &Index::knnQuery_return_numpy, py::arg("data"), py::arg("k")=1, py::arg("num_threads")=-1) .def("add_items", &Index::addItems, py::arg("data"), py::arg("ids") = py::none(), py::arg("num_threads")=-1) .def("get_items", &Index::getDataReturnList, py::arg("ids") = py::none()) .def("get_ids_list", &Index::getIdsList) .def("set_ef", &Index::set_ef, py::arg("ef")) .def("set_num_threads", &Index::set_num_threads, py::arg("num_threads")) .def("save_index", &Index::saveIndex, py::arg("path_to_index")) .def("load_index", &Index::loadIndex, py::arg("path_to_index"), py::arg("max_elements")=0) .def("mark_deleted", &Index::markDeleted, py::arg("label")) .def("unmark_deleted", &Index::unmarkDeleted, py::arg("label")) .def("resize_index", &Index::resizeIndex, py::arg("new_size")) .def("get_max_elements", &Index::getMaxElements) .def("get_current_count", &Index::getCurrentCount) .def_readonly("space", &Index::space_name) .def_readonly("dim", &Index::dim) .def_readwrite("num_threads", &Index::num_threads_default) .def_property("ef", [](const Index & index) { return index.index_inited ? index.appr_alg->ef_ : index.default_ef; }, [](Index & index, const size_t ef_) { index.default_ef=ef_; if (index.appr_alg) index.appr_alg->ef_ = ef_; }) .def_property_readonly("max_elements", [](const Index & index) { return index.index_inited ? index.appr_alg->max_elements_ : 0; }) .def_property_readonly("element_count", [](const Index & index) { return index.index_inited ? index.appr_alg->cur_element_count : 0; }) .def_property_readonly("ef_construction", [](const Index & index) { return index.index_inited ? index.appr_alg->ef_construction_ : 0; }) .def_property_readonly("M", [](const Index & index) { return index.index_inited ? index.appr_alg->M_ : 0; }) .def(py::pickle( [](const Index &ind) { // __getstate__ return py::make_tuple(ind.getIndexParams()); /* Return dict (wrapped in a tuple) that fully encodes state of the Index object */ }, [](py::tuple t) { // __setstate__ if (t.size() != 1) throw std::runtime_error("Invalid state!"); return Index::createFromParams(t[0].cast()); } )) .def("__repr__", [](const Index &a) { return ""; }); py::class_>(m, "BFIndex") .def(py::init(), py::arg("space"), py::arg("dim")) .def("init_index", &BFIndex::init_new_index, py::arg("max_elements")) .def("knn_query", &BFIndex::knnQuery_return_numpy, py::arg("data"), py::arg("k")=1) .def("add_items", &BFIndex::addItems, py::arg("data"), py::arg("ids") = py::none()) .def("delete_vector", &BFIndex::deleteVector, py::arg("label")) .def("save_index", &BFIndex::saveIndex, py::arg("path_to_index")) .def("load_index", &BFIndex::loadIndex, py::arg("path_to_index"), py::arg("max_elements")=0) .def("__repr__", [](const BFIndex &a) { return ""; }); return m.ptr(); } hnswlib-0.6.2/python_bindings/setup.py000077700000000000000000000000001420255137300220172../setup.pyustar00rootroot00000000000000hnswlib-0.6.2/python_bindings/tests/000077500000000000000000000000001420255137300175275ustar00rootroot00000000000000hnswlib-0.6.2/python_bindings/tests/__init__.py000066400000000000000000000000001420255137300216260ustar00rootroot00000000000000hnswlib-0.6.2/python_bindings/tests/bindings_test.py000066400000000000000000000042771420255137300227470ustar00rootroot00000000000000import os import unittest import numpy as np import hnswlib class RandomSelfTestCase(unittest.TestCase): def testRandomSelf(self): dim = 16 num_elements = 10000 # Generating sample data data = np.float32(np.random.random((num_elements, dim))) # Declaring index p = hnswlib.Index(space='l2', dim=dim) # possible options are l2, cosine or ip # Initiating index # max_elements - the maximum number of elements, should be known beforehand # (probably will be made optional in the future) # # ef_construction - controls index search speed/build speed tradeoff # M - is tightly connected with internal dimensionality of the data # strongly affects the memory consumption p.init_index(max_elements=num_elements, ef_construction=100, M=16) # Controlling the recall by setting ef: # higher ef leads to better accuracy, but slower search p.set_ef(10) p.set_num_threads(4) # by default using all available cores # We split the data in two batches: data1 = data[:num_elements // 2] data2 = data[num_elements // 2:] print("Adding first batch of %d elements" % (len(data1))) p.add_items(data1) # Query the elements for themselves and measure recall: labels, distances = p.knn_query(data1, k=1) self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data1))), 1.0, 3) # Serializing and deleting the index: index_path = 'first_half.bin' print("Saving index to '%s'" % index_path) p.save_index(index_path) del p # Re-initiating, loading the index p = hnswlib.Index(space='l2', dim=dim) # you can change the sa print("\nLoading index from '%s'\n" % index_path) p.load_index(index_path) print("Adding the second batch of %d elements" % (len(data2))) p.add_items(data2) # Query the elements for themselves and measure recall: labels, distances = p.knn_query(data, k=1) self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data))), 1.0, 3) os.remove(index_path) hnswlib-0.6.2/python_bindings/tests/bindings_test_getdata.py000066400000000000000000000032641420255137300244330ustar00rootroot00000000000000import unittest import numpy as np import hnswlib class RandomSelfTestCase(unittest.TestCase): def testGettingItems(self): print("\n**** Getting the data by label test ****\n") dim = 16 num_elements = 10000 # Generating sample data data = np.float32(np.random.random((num_elements, dim))) labels = np.arange(0, num_elements) # Declaring index p = hnswlib.Index(space='l2', dim=dim) # possible options are l2, cosine or ip # Initiating index # max_elements - the maximum number of elements, should be known beforehand # (probably will be made optional in the future) # # ef_construction - controls index search speed/build speed tradeoff # M - is tightly connected with internal dimensionality of the data # strongly affects the memory consumption p.init_index(max_elements=num_elements, ef_construction=100, M=16) # Controlling the recall by setting ef: # higher ef leads to better accuracy, but slower search p.set_ef(100) p.set_num_threads(4) # by default using all available cores # Before adding anything, getting any labels should fail self.assertRaises(Exception, lambda: p.get_items(labels)) print("Adding all elements (%d)" % (len(data))) p.add_items(data, labels) # Getting data by label should raise an exception if a scalar is passed: self.assertRaises(ValueError, lambda: p.get_items(labels[0])) # After adding them, all labels should be retrievable returned_items = p.get_items(labels) self.assertSequenceEqual(data.tolist(), returned_items) hnswlib-0.6.2/python_bindings/tests/bindings_test_labels.py000066400000000000000000000117131420255137300242620ustar00rootroot00000000000000import os import unittest import numpy as np import hnswlib class RandomSelfTestCase(unittest.TestCase): def testRandomSelf(self): for idx in range(2): print("\n**** Index save-load test ****\n") np.random.seed(idx) dim = 16 num_elements = 10000 # Generating sample data data = np.float32(np.random.random((num_elements, dim))) # Declaring index p = hnswlib.Index(space='l2', dim=dim) # possible options are l2, cosine or ip # Initiating index # max_elements - the maximum number of elements, should be known beforehand # (probably will be made optional in the future) # # ef_construction - controls index search speed/build speed tradeoff # M - is tightly connected with internal dimensionality of the data # strongly affects the memory consumption p.init_index(max_elements=num_elements, ef_construction=100, M=16) # Controlling the recall by setting ef: # higher ef leads to better accuracy, but slower search p.set_ef(100) p.set_num_threads(4) # by default using all available cores # We split the data in two batches: data1 = data[:num_elements // 2] data2 = data[num_elements // 2:] print("Adding first batch of %d elements" % (len(data1))) p.add_items(data1) # Query the elements for themselves and measure recall: labels, distances = p.knn_query(data1, k=1) items = p.get_items(labels) # Check the recall: self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data1))), 1.0, 3) # Check that the returned element data is correct: diff_with_gt_labels=np.mean(np.abs(data1-items)) self.assertAlmostEqual(diff_with_gt_labels, 0, delta=1e-4) # Serializing and deleting the index. # We need the part to check that serialization is working properly. index_path = 'first_half.bin' print("Saving index to '%s'" % index_path) p.save_index(index_path) print("Saved. Deleting...") del p print("Deleted") print("\n**** Mark delete test ****\n") # Re-initiating, loading the index print("Re-initiating") p = hnswlib.Index(space='l2', dim=dim) print("\nLoading index from '%s'\n" % index_path) p.load_index(index_path) p.set_ef(100) print("Adding the second batch of %d elements" % (len(data2))) p.add_items(data2) # Query the elements for themselves and measure recall: labels, distances = p.knn_query(data, k=1) items = p.get_items(labels) # Check the recall: self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data))), 1.0, 3) # Check that the returned element data is correct: diff_with_gt_labels = np.mean(np.abs(data-items)) self.assertAlmostEqual(diff_with_gt_labels, 0, delta=1e-4) # deleting index. # Checking that all labels are returned correctly: sorted_labels = sorted(p.get_ids_list()) self.assertEqual(np.sum(~np.asarray(sorted_labels) == np.asarray(range(num_elements))), 0) # Delete data1 labels1_deleted, _ = p.knn_query(data1, k=1) for l in labels1_deleted: p.mark_deleted(l[0]) labels2, _ = p.knn_query(data2, k=1) items = p.get_items(labels2) diff_with_gt_labels = np.mean(np.abs(data2-items)) self.assertAlmostEqual(diff_with_gt_labels, 0, delta=1e-3) # console labels1_after, _ = p.knn_query(data1, k=1) for la in labels1_after: for lb in labels1_deleted: if la[0] == lb[0]: self.assertTrue(False) print("All the data in data1 are removed") # Checking saving/loading index with elements marked as deleted del_index_path = "with_deleted.bin" p.save_index(del_index_path) p = hnswlib.Index(space='l2', dim=dim) p.load_index(del_index_path) p.set_ef(100) labels1_after, _ = p.knn_query(data1, k=1) for la in labels1_after: for lb in labels1_deleted: if la[0] == lb[0]: self.assertTrue(False) # Unmark deleted data for l in labels1_deleted: p.unmark_deleted(l[0]) labels_restored, _ = p.knn_query(data1, k=1) self.assertAlmostEqual(np.mean(labels_restored.reshape(-1) == np.arange(len(data1))), 1.0, 3) print("All the data in data1 are restored") os.remove(index_path) os.remove(del_index_path) hnswlib-0.6.2/python_bindings/tests/bindings_test_metadata.py000066400000000000000000000030601420255137300245740ustar00rootroot00000000000000import unittest import numpy as np import hnswlib class RandomSelfTestCase(unittest.TestCase): def testMetadata(self): dim = 16 num_elements = 10000 # Generating sample data data = np.float32(np.random.random((num_elements, dim))) # Declaring index p = hnswlib.Index(space='l2', dim=dim) # possible options are l2, cosine or ip # Initing index # max_elements - the maximum number of elements, should be known beforehand # (probably will be made optional in the future) # # ef_construction - controls index search speed/build speed tradeoff # M - is tightly connected with internal dimensionality of the data # stronlgy affects the memory consumption p.init_index(max_elements=num_elements, ef_construction=100, M=16) # Controlling the recall by setting ef: # higher ef leads to better accuracy, but slower search p.set_ef(100) p.set_num_threads(4) # by default using all available cores print("Adding all elements (%d)" % (len(data))) p.add_items(data) # test methods self.assertEqual(p.get_max_elements(), num_elements) self.assertEqual(p.get_current_count(), num_elements) # test properties self.assertEqual(p.space, 'l2') self.assertEqual(p.dim, dim) self.assertEqual(p.M, 16) self.assertEqual(p.ef_construction, 100) self.assertEqual(p.max_elements, num_elements) self.assertEqual(p.element_count, num_elements) hnswlib-0.6.2/python_bindings/tests/bindings_test_pickle.py000066400000000000000000000145361420255137300242750ustar00rootroot00000000000000import pickle import unittest import numpy as np import hnswlib def get_dist(metric, pt1, pt2): if metric == 'l2': return np.sum((pt1-pt2)**2) elif metric == 'ip': return 1. - np.sum(np.multiply(pt1, pt2)) elif metric == 'cosine': return 1. - np.sum(np.multiply(pt1, pt2)) / (np.sum(pt1**2) * np.sum(pt2**2))**.5 def brute_force_distances(metric, items, query_items, k): dists = np.zeros((query_items.shape[0], items.shape[0])) for ii in range(items.shape[0]): for jj in range(query_items.shape[0]): dists[jj,ii] = get_dist(metric, items[ii, :], query_items[jj, :]) labels = np.argsort(dists, axis=1) # equivalent, but faster: np.argpartition(dists, range(k), axis=1) dists = np.sort(dists, axis=1) # equivalent, but faster: np.partition(dists, range(k), axis=1) return labels[:, :k], dists[:, :k] def check_ann_results(self, metric, items, query_items, k, ann_l, ann_d, err_thresh=0, total_thresh=0, dists_thresh=0): brute_l, brute_d = brute_force_distances(metric, items, query_items, k) err_total = 0 for jj in range(query_items.shape[0]): err = np.sum(np.isin(brute_l[jj, :], ann_l[jj, :], invert=True)) if err > 0: print(f"Warning: {err} labels are missing from ann results (k={k}, err_thresh={err_thresh})") if err > err_thresh: err_total += 1 self.assertLessEqual(err_total, total_thresh, f"Error: knn_query returned incorrect labels for {err_total} items (k={k})") wrong_dists = np.sum(((brute_d - ann_d)**2.) > 1e-3) if wrong_dists > 0: dists_count = brute_d.shape[0]*brute_d.shape[1] print(f"Warning: {wrong_dists} ann distance values are different from brute-force values (total # of values={dists_count}, dists_thresh={dists_thresh})") self.assertLessEqual(wrong_dists, dists_thresh, msg=f"Error: {wrong_dists} ann distance values are different from brute-force values") def test_space_main(self, space, dim): # Generating sample data data = np.float32(np.random.random((self.num_elements, dim))) test_data = np.float32(np.random.random((self.num_test_elements, dim))) # Declaring index p = hnswlib.Index(space=space, dim=dim) # possible options are l2, cosine or ip print(f"Running pickle tests for {p}") p.num_threads = self.num_threads # by default using all available cores p0 = pickle.loads(pickle.dumps(p)) # pickle un-initialized Index p.init_index(max_elements=self.num_elements, ef_construction=self.ef_construction, M=self.M) p0.init_index(max_elements=self.num_elements, ef_construction=self.ef_construction, M=self.M) p.ef = self.ef p0.ef = self.ef p1 = pickle.loads(pickle.dumps(p)) # pickle Index before adding items # add items to ann index p,p0,p1 p.add_items(data) p1.add_items(data) p0.add_items(data) p2=pickle.loads(pickle.dumps(p)) # pickle Index before adding items self.assertTrue(np.allclose(p.get_items(), p0.get_items()), "items for p and p0 must be same") self.assertTrue(np.allclose(p0.get_items(), p1.get_items()), "items for p0 and p1 must be same") self.assertTrue(np.allclose(p1.get_items(), p2.get_items()), "items for p1 and p2 must be same") # Test if returned distances are same l, d = p.knn_query(test_data, k=self.k) l0, d0 = p0.knn_query(test_data, k=self.k) l1, d1 = p1.knn_query(test_data, k=self.k) l2, d2 = p2.knn_query(test_data, k=self.k) self.assertLessEqual(np.sum(((d-d0)**2.) > 1e-3), self.dists_err_thresh, msg=f"knn distances returned by p and p0 must match") self.assertLessEqual(np.sum(((d0-d1)**2.) > 1e-3), self.dists_err_thresh, msg=f"knn distances returned by p0 and p1 must match") self.assertLessEqual(np.sum(((d1-d2)**2.) > 1e-3), self.dists_err_thresh, msg=f"knn distances returned by p1 and p2 must match") # check if ann results match brute-force search # allow for 2 labels to be missing from ann results check_ann_results(self, space, data, test_data, self.k, l, d, err_thresh=self.label_err_thresh, total_thresh=self.item_err_thresh, dists_thresh=self.dists_err_thresh) check_ann_results(self, space, data, test_data, self.k, l2, d2, err_thresh=self.label_err_thresh, total_thresh=self.item_err_thresh, dists_thresh=self.dists_err_thresh) # Check ef parameter value self.assertEqual(p.ef, self.ef, "incorrect value of p.ef") self.assertEqual(p0.ef, self.ef, "incorrect value of p0.ef") self.assertEqual(p2.ef, self.ef, "incorrect value of p2.ef") self.assertEqual(p1.ef, self.ef, "incorrect value of p1.ef") # Check M parameter value self.assertEqual(p.M, self.M, "incorrect value of p.M") self.assertEqual(p0.M, self.M, "incorrect value of p0.M") self.assertEqual(p1.M, self.M, "incorrect value of p1.M") self.assertEqual(p2.M, self.M, "incorrect value of p2.M") # Check ef_construction parameter value self.assertEqual(p.ef_construction, self.ef_construction, "incorrect value of p.ef_construction") self.assertEqual(p0.ef_construction, self.ef_construction, "incorrect value of p0.ef_construction") self.assertEqual(p1.ef_construction, self.ef_construction, "incorrect value of p1.ef_construction") self.assertEqual(p2.ef_construction, self.ef_construction, "incorrect value of p2.ef_construction") class PickleUnitTests(unittest.TestCase): def setUp(self): self.ef_construction = 200 self.M = 32 self.ef = 400 self.num_elements = 1000 self.num_test_elements = 100 self.num_threads = 4 self.k = 25 self.label_err_thresh = 5 # max number of missing labels allowed per test item self.item_err_thresh = 5 # max number of items allowed with incorrect labels self.dists_err_thresh = 50 # for two matrices, d1 and d2, dists_err_thresh controls max # number of value pairs that are allowed to be different in d1 and d2 # i.e., number of values that are (d1-d2)**2>1e-3 def test_inner_product_space(self): test_space_main(self, 'ip', 16) def test_l2_space(self): test_space_main(self, 'l2', 53) def test_cosine_space(self): test_space_main(self, 'cosine', 32) hnswlib-0.6.2/python_bindings/tests/bindings_test_recall.py000066400000000000000000000051651420255137300242660ustar00rootroot00000000000000import hnswlib import numpy as np dim = 32 num_elements = 100000 k = 10 nun_queries = 10 # Generating sample data data = np.float32(np.random.random((num_elements, dim))) # Declaring index hnsw_index = hnswlib.Index(space='l2', dim=dim) # possible options are l2, cosine or ip bf_index = hnswlib.BFIndex(space='l2', dim=dim) # Initing both hnsw and brute force indices # max_elements - the maximum number of elements (capacity). Will throw an exception if exceeded # during insertion of an element. # The capacity can be increased by saving/loading the index, see below. # # hnsw construction params: # ef_construction - controls index search speed/build speed tradeoff # # M - is tightly connected with internal dimensionality of the data. Strongly affects the memory consumption (~M) # Higher M leads to higher accuracy/run_time at fixed ef/efConstruction hnsw_index.init_index(max_elements=num_elements, ef_construction=200, M=16) bf_index.init_index(max_elements=num_elements) # Controlling the recall for hnsw by setting ef: # higher ef leads to better accuracy, but slower search hnsw_index.set_ef(200) # Set number of threads used during batch search/construction in hnsw # By default using all available cores hnsw_index.set_num_threads(1) print("Adding batch of %d elements" % (len(data))) hnsw_index.add_items(data) bf_index.add_items(data) print("Indices built") # Generating query data query_data = np.float32(np.random.random((nun_queries, dim))) # Query the elements and measure recall: labels_hnsw, distances_hnsw = hnsw_index.knn_query(query_data, k) labels_bf, distances_bf = bf_index.knn_query(query_data, k) # Measure recall correct = 0 for i in range(nun_queries): for label in labels_hnsw[i]: for correct_label in labels_bf[i]: if label == correct_label: correct += 1 break print("recall is :", float(correct)/(k*nun_queries)) # test serializing the brute force index index_path = 'bf_index.bin' print("Saving index to '%s'" % index_path) bf_index.save_index(index_path) del bf_index # Re-initiating, loading the index bf_index = hnswlib.BFIndex(space='l2', dim=dim) print("\nLoading index from '%s'\n" % index_path) bf_index.load_index(index_path) # Query the brute force index again to verify that we get the same results labels_bf, distances_bf = bf_index.knn_query(query_data, k) # Measure recall correct = 0 for i in range(nun_queries): for label in labels_hnsw[i]: for correct_label in labels_bf[i]: if label == correct_label: correct += 1 break print("recall after reloading is :", float(correct)/(k*nun_queries)) hnswlib-0.6.2/python_bindings/tests/bindings_test_resize.py000066400000000000000000000055521420255137300243250ustar00rootroot00000000000000import unittest import numpy as np import hnswlib class RandomSelfTestCase(unittest.TestCase): def testRandomSelf(self): for idx in range(16): print("\n**** Index resize test ****\n") np.random.seed(idx) dim = 16 num_elements = 10000 # Generating sample data data = np.float32(np.random.random((num_elements, dim))) # Declaring index p = hnswlib.Index(space='l2', dim=dim) # possible options are l2, cosine or ip # Initiating index # max_elements - the maximum number of elements, should be known beforehand # (probably will be made optional in the future) # # ef_construction - controls index search speed/build speed tradeoff # M - is tightly connected with internal dimensionality of the data # strongly affects the memory consumption p.init_index(max_elements=num_elements//2, ef_construction=100, M=16) # Controlling the recall by setting ef: # higher ef leads to better accuracy, but slower search p.set_ef(20) p.set_num_threads(idx % 8) # by default using all available cores # We split the data in two batches: data1 = data[:num_elements // 2] data2 = data[num_elements // 2:] print("Adding first batch of %d elements" % (len(data1))) p.add_items(data1) # Query the elements for themselves and measure recall: labels, distances = p.knn_query(data1, k=1) items = p.get_items(list(range(len(data1)))) # Check the recall: self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data1))), 1.0, 3) # Check that the returned element data is correct: diff_with_gt_labels = np.max(np.abs(data1-items)) self.assertAlmostEqual(diff_with_gt_labels, 0, delta=1e-4) print("Resizing the index") p.resize_index(num_elements) print("Adding the second batch of %d elements" % (len(data2))) p.add_items(data2) # Query the elements for themselves and measure recall: labels, distances = p.knn_query(data, k=1) items=p.get_items(list(range(num_elements))) # Check the recall: self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data))), 1.0, 3) # Check that the returned element data is correct: diff_with_gt_labels = np.max(np.abs(data-items)) self.assertAlmostEqual(diff_with_gt_labels, 0, delta=1e-4) # Checking that all labels are returned correctly: sorted_labels = sorted(p.get_ids_list()) self.assertEqual(np.sum(~np.asarray(sorted_labels) == np.asarray(range(num_elements))), 0) hnswlib-0.6.2/python_bindings/tests/bindings_test_spaces.py000066400000000000000000000025731420255137300243020ustar00rootroot00000000000000import unittest import numpy as np import hnswlib class RandomSelfTestCase(unittest.TestCase): def testRandomSelf(self): data1 = np.asarray([[1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 1], [1, 1, 1], ]) for space, expected_distances in [ ('l2', [[0., 1., 2., 2., 2.]]), ('ip', [[-2., -1., 0., 0., 0.]]), ('cosine', [[0, 1.835e-01, 4.23e-01, 4.23e-01, 4.23e-01]])]: for rightdim in range(1, 128, 3): for leftdim in range(1, 32, 5): data2 = np.concatenate( [np.zeros([data1.shape[0], leftdim]), data1, np.zeros([data1.shape[0], rightdim])], axis=1) dim = data2.shape[1] p = hnswlib.Index(space=space, dim=dim) p.init_index(max_elements=5, ef_construction=100, M=16) p.set_ef(10) p.add_items(data2) # Query the elements for themselves and measure recall: labels, distances = p.knn_query(np.asarray(data2[-1:]), k=5) diff=np.mean(np.abs(distances-expected_distances)) self.assertAlmostEqual(diff, 0, delta=1e-3) hnswlib-0.6.2/setup.py000066400000000000000000000070351420255137300147060ustar00rootroot00000000000000import os import sys import platform import numpy as np import pybind11 import setuptools from setuptools import Extension, setup from setuptools.command.build_ext import build_ext __version__ = '0.6.1' include_dirs = [ pybind11.get_include(), np.get_include(), ] # compatibility when run in python_bindings bindings_dir = 'python_bindings' if bindings_dir in os.path.basename(os.getcwd()): source_files = ['./bindings.cpp'] include_dirs.extend(['../hnswlib/']) else: source_files = ['./python_bindings/bindings.cpp'] include_dirs.extend(['./hnswlib/']) libraries = [] extra_objects = [] ext_modules = [ Extension( 'hnswlib', source_files, include_dirs=include_dirs, libraries=libraries, language='c++', extra_objects=extra_objects, ), ] # As of Python 3.6, CCompiler has a `has_flag` method. # cf http://bugs.python.org/issue26689 def has_flag(compiler, flagname): """Return a boolean indicating whether a flag name is supported on the specified compiler. """ import tempfile with tempfile.NamedTemporaryFile('w', suffix='.cpp') as f: f.write('int main (int argc, char **argv) { return 0; }') try: compiler.compile([f.name], extra_postargs=[flagname]) except setuptools.distutils.errors.CompileError: return False return True def cpp_flag(compiler): """Return the -std=c++[11/14] compiler flag. The c++14 is prefered over c++11 (when it is available). """ if has_flag(compiler, '-std=c++14'): return '-std=c++14' elif has_flag(compiler, '-std=c++11'): return '-std=c++11' else: raise RuntimeError('Unsupported compiler -- at least C++11 support ' 'is needed!') class BuildExt(build_ext): """A custom build extension for adding compiler-specific options.""" c_opts = { 'msvc': ['/EHsc', '/openmp', '/O2'], #'unix': ['-O3', '-march=native'], # , '-w' 'unix': ['-O3'], # , '-w' } if not os.environ.get("HNSWLIB_NO_NATIVE"): c_opts['unix'].append('-march=native') link_opts = { 'unix': [], 'msvc': [], } if sys.platform == 'darwin': if platform.machine() == 'arm64': c_opts['unix'].remove('-march=native') c_opts['unix'] += ['-stdlib=libc++', '-mmacosx-version-min=10.7'] link_opts['unix'] += ['-stdlib=libc++', '-mmacosx-version-min=10.7'] else: c_opts['unix'].append("-fopenmp") link_opts['unix'].extend(['-fopenmp', '-pthread']) def build_extensions(self): ct = self.compiler.compiler_type opts = self.c_opts.get(ct, []) if ct == 'unix': opts.append('-DVERSION_INFO="%s"' % self.distribution.get_version()) opts.append(cpp_flag(self.compiler)) if has_flag(self.compiler, '-fvisibility=hidden'): opts.append('-fvisibility=hidden') elif ct == 'msvc': opts.append('/DVERSION_INFO=\\"%s\\"' % self.distribution.get_version()) for ext in self.extensions: ext.extra_compile_args.extend(opts) ext.extra_link_args.extend(self.link_opts.get(ct, [])) build_ext.build_extensions(self) setup( name='hnswlib', version=__version__, description='hnswlib', author='Yury Malkov and others', url='https://github.com/yurymalkov/hnsw', long_description="""hnsw""", ext_modules=ext_modules, install_requires=['numpy'], cmdclass={'build_ext': BuildExt}, zip_safe=False, ) hnswlib-0.6.2/sift_1b.cpp000066400000000000000000000255261420255137300152340ustar00rootroot00000000000000#include #include #include #include #include "hnswlib/hnswlib.h" #include using namespace std; using namespace hnswlib; class StopW { std::chrono::steady_clock::time_point time_begin; public: StopW() { time_begin = std::chrono::steady_clock::now(); } float getElapsedTimeMicro() { std::chrono::steady_clock::time_point time_end = std::chrono::steady_clock::now(); return (std::chrono::duration_cast(time_end - time_begin).count()); } void reset() { time_begin = std::chrono::steady_clock::now(); } }; /* * Author: David Robert Nadeau * Site: http://NadeauSoftware.com/ * License: Creative Commons Attribution 3.0 Unported License * http://creativecommons.org/licenses/by/3.0/deed.en_US */ #if defined(_WIN32) #include #include #elif defined(__unix__) || defined(__unix) || defined(unix) || (defined(__APPLE__) && defined(__MACH__)) #include #include #if defined(__APPLE__) && defined(__MACH__) #include #elif (defined(_AIX) || defined(__TOS__AIX__)) || (defined(__sun__) || defined(__sun) || defined(sun) && (defined(__SVR4) || defined(__svr4__))) #include #include #elif defined(__linux__) || defined(__linux) || defined(linux) || defined(__gnu_linux__) #endif #else #error "Cannot define getPeakRSS( ) or getCurrentRSS( ) for an unknown OS." #endif /** * Returns the peak (maximum so far) resident set size (physical * memory use) measured in bytes, or zero if the value cannot be * determined on this OS. */ static size_t getPeakRSS() { #if defined(_WIN32) /* Windows -------------------------------------------------- */ PROCESS_MEMORY_COUNTERS info; GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); return (size_t)info.PeakWorkingSetSize; #elif (defined(_AIX) || defined(__TOS__AIX__)) || (defined(__sun__) || defined(__sun) || defined(sun) && (defined(__SVR4) || defined(__svr4__))) /* AIX and Solaris ------------------------------------------ */ struct psinfo psinfo; int fd = -1; if ((fd = open("/proc/self/psinfo", O_RDONLY)) == -1) return (size_t)0L; /* Can't open? */ if (read(fd, &psinfo, sizeof(psinfo)) != sizeof(psinfo)) { close(fd); return (size_t)0L; /* Can't read? */ } close(fd); return (size_t)(psinfo.pr_rssize * 1024L); #elif defined(__unix__) || defined(__unix) || defined(unix) || (defined(__APPLE__) && defined(__MACH__)) /* BSD, Linux, and OSX -------------------------------------- */ struct rusage rusage; getrusage(RUSAGE_SELF, &rusage); #if defined(__APPLE__) && defined(__MACH__) return (size_t)rusage.ru_maxrss; #else return (size_t) (rusage.ru_maxrss * 1024L); #endif #else /* Unknown OS ----------------------------------------------- */ return (size_t)0L; /* Unsupported. */ #endif } /** * Returns the current resident set size (physical memory use) measured * in bytes, or zero if the value cannot be determined on this OS. */ static size_t getCurrentRSS() { #if defined(_WIN32) /* Windows -------------------------------------------------- */ PROCESS_MEMORY_COUNTERS info; GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); return (size_t)info.WorkingSetSize; #elif defined(__APPLE__) && defined(__MACH__) /* OSX ------------------------------------------------------ */ struct mach_task_basic_info info; mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT; if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) != KERN_SUCCESS) return (size_t)0L; /* Can't access? */ return (size_t)info.resident_size; #elif defined(__linux__) || defined(__linux) || defined(linux) || defined(__gnu_linux__) /* Linux ---------------------------------------------------- */ long rss = 0L; FILE *fp = NULL; if ((fp = fopen("/proc/self/statm", "r")) == NULL) return (size_t) 0L; /* Can't open? */ if (fscanf(fp, "%*s%ld", &rss) != 1) { fclose(fp); return (size_t) 0L; /* Can't read? */ } fclose(fp); return (size_t) rss * (size_t) sysconf(_SC_PAGESIZE); #else /* AIX, BSD, Solaris, and Unknown OS ------------------------ */ return (size_t)0L; /* Unsupported. */ #endif } static void get_gt(unsigned int *massQA, unsigned char *massQ, unsigned char *mass, size_t vecsize, size_t qsize, L2SpaceI &l2space, size_t vecdim, vector>> &answers, size_t k) { (vector>>(qsize)).swap(answers); DISTFUNC fstdistfunc_ = l2space.get_dist_func(); cout << qsize << "\n"; for (int i = 0; i < qsize; i++) { for (int j = 0; j < k; j++) { answers[i].emplace(0.0f, massQA[1000 * i + j]); } } } static float test_approx(unsigned char *massQ, size_t vecsize, size_t qsize, HierarchicalNSW &appr_alg, size_t vecdim, vector>> &answers, size_t k) { size_t correct = 0; size_t total = 0; //uncomment to test in parallel mode: //#pragma omp parallel for for (int i = 0; i < qsize; i++) { std::priority_queue> result = appr_alg.searchKnn(massQ + vecdim * i, k); std::priority_queue> gt(answers[i]); unordered_set g; total += gt.size(); while (gt.size()) { g.insert(gt.top().second); gt.pop(); } while (result.size()) { if (g.find(result.top().second) != g.end()) { correct++; } else { } result.pop(); } } return 1.0f * correct / total; } static void test_vs_recall(unsigned char *massQ, size_t vecsize, size_t qsize, HierarchicalNSW &appr_alg, size_t vecdim, vector>> &answers, size_t k) { vector efs;// = { 10,10,10,10,10 }; for (int i = k; i < 30; i++) { efs.push_back(i); } for (int i = 30; i < 100; i += 10) { efs.push_back(i); } for (int i = 100; i < 500; i += 40) { efs.push_back(i); } for (size_t ef : efs) { appr_alg.setEf(ef); StopW stopw = StopW(); float recall = test_approx(massQ, vecsize, qsize, appr_alg, vecdim, answers, k); float time_us_per_query = stopw.getElapsedTimeMicro() / qsize; cout << ef << "\t" << recall << "\t" << time_us_per_query << " us\n"; if (recall > 1.0) { cout << recall << "\t" << time_us_per_query << " us\n"; break; } } } inline bool exists_test(const std::string &name) { ifstream f(name.c_str()); return f.good(); } void sift_test1B() { int subset_size_milllions = 200; int efConstruction = 40; int M = 16; size_t vecsize = subset_size_milllions * 1000000; size_t qsize = 10000; size_t vecdim = 128; char path_index[1024]; char path_gt[1024]; char *path_q = "../bigann/bigann_query.bvecs"; char *path_data = "../bigann/bigann_base.bvecs"; sprintf(path_index, "sift1b_%dm_ef_%d_M_%d.bin", subset_size_milllions, efConstruction, M); sprintf(path_gt, "../bigann/gnd/idx_%dM.ivecs", subset_size_milllions); unsigned char *massb = new unsigned char[vecdim]; cout << "Loading GT:\n"; ifstream inputGT(path_gt, ios::binary); unsigned int *massQA = new unsigned int[qsize * 1000]; for (int i = 0; i < qsize; i++) { int t; inputGT.read((char *) &t, 4); inputGT.read((char *) (massQA + 1000 * i), t * 4); if (t != 1000) { cout << "err"; return; } } inputGT.close(); cout << "Loading queries:\n"; unsigned char *massQ = new unsigned char[qsize * vecdim]; ifstream inputQ(path_q, ios::binary); for (int i = 0; i < qsize; i++) { int in = 0; inputQ.read((char *) &in, 4); if (in != 128) { cout << "file error"; exit(1); } inputQ.read((char *) massb, in); for (int j = 0; j < vecdim; j++) { massQ[i * vecdim + j] = massb[j]; } } inputQ.close(); unsigned char *mass = new unsigned char[vecdim]; ifstream input(path_data, ios::binary); int in = 0; L2SpaceI l2space(vecdim); HierarchicalNSW *appr_alg; if (exists_test(path_index)) { cout << "Loading index from " << path_index << ":\n"; appr_alg = new HierarchicalNSW(&l2space, path_index, false); cout << "Actual memory usage: " << getCurrentRSS() / 1000000 << " Mb \n"; } else { cout << "Building index:\n"; appr_alg = new HierarchicalNSW(&l2space, vecsize, M, efConstruction); input.read((char *) &in, 4); if (in != 128) { cout << "file error"; exit(1); } input.read((char *) massb, in); for (int j = 0; j < vecdim; j++) { mass[j] = massb[j] * (1.0f); } appr_alg->addPoint((void *) (massb), (size_t) 0); int j1 = 0; StopW stopw = StopW(); StopW stopw_full = StopW(); size_t report_every = 100000; #pragma omp parallel for for (int i = 1; i < vecsize; i++) { unsigned char mass[128]; int j2=0; #pragma omp critical { input.read((char *) &in, 4); if (in != 128) { cout << "file error"; exit(1); } input.read((char *) massb, in); for (int j = 0; j < vecdim; j++) { mass[j] = massb[j]; } j1++; j2=j1; if (j1 % report_every == 0) { cout << j1 / (0.01 * vecsize) << " %, " << report_every / (1000.0 * 1e-6 * stopw.getElapsedTimeMicro()) << " kips " << " Mem: " << getCurrentRSS() / 1000000 << " Mb \n"; stopw.reset(); } } appr_alg->addPoint((void *) (mass), (size_t) j2); } input.close(); cout << "Build time:" << 1e-6 * stopw_full.getElapsedTimeMicro() << " seconds\n"; appr_alg->saveIndex(path_index); } vector>> answers; size_t k = 1; cout << "Parsing gt:\n"; get_gt(massQA, massQ, mass, vecsize, qsize, l2space, vecdim, answers, k); cout << "Loaded gt\n"; for (int i = 0; i < 1; i++) test_vs_recall(massQ, vecsize, qsize, *appr_alg, vecdim, answers, k); cout << "Actual memory usage: " << getCurrentRSS() / 1000000 << " Mb \n"; return; } hnswlib-0.6.2/sift_test.cpp000066400000000000000000000253401420255137300157030ustar00rootroot00000000000000#include #include #include #include #include "hnswlib/hnswlib.h" #include using namespace std; using namespace hnswlib; /* template void writeBinaryPOD(ostream& out, const T& podRef) { out.write((char*)&podRef, sizeof(T)); } template static void readBinaryPOD(istream& in, T& podRef) { in.read((char*)&podRef, sizeof(T)); }*/ class StopW { std::chrono::steady_clock::time_point time_begin; public: StopW() { time_begin = std::chrono::steady_clock::now(); } float getElapsedTimeMicro() { std::chrono::steady_clock::time_point time_end = std::chrono::steady_clock::now(); return (std::chrono::duration_cast(time_end - time_begin).count()); } void reset() { time_begin = std::chrono::steady_clock::now(); } }; void get_gt(float *mass, float *massQ, size_t vecsize, size_t qsize, L2Space &l2space, size_t vecdim, vector>> &answers, size_t k) { BruteforceSearch bs(&l2space, vecsize); for (int i = 0; i < vecsize; i++) { bs.addPoint((void *) (mass + vecdim * i), (size_t) i); } (vector>>(qsize)).swap(answers); //answers.swap(vector>>(qsize)); for (int i = 0; i < qsize; i++) { std::priority_queue> gt = bs.searchKnn(massQ + vecdim * i, 10); answers[i] = gt; } } void get_gt(unsigned int *massQA, float *massQ, float *mass, size_t vecsize, size_t qsize, L2Space &l2space, size_t vecdim, vector>> &answers, size_t k) { //answers.swap(vector>>(qsize)); (vector>>(qsize)).swap(answers); DISTFUNC fstdistfunc_ = l2space.get_dist_func(); cout << qsize << "\n"; for (int i = 0; i < qsize; i++) { for (int j = 0; j < k; j++) { float other = fstdistfunc_(massQ + i * vecdim, mass + massQA[100 * i + j] * vecdim, l2space.get_dist_func_param()); answers[i].emplace(other, massQA[100 * i + j]); } } } float test_approx(float *massQ, size_t vecsize, size_t qsize, HierarchicalNSW &appr_alg, size_t vecdim, vector>> &answers, size_t k) { size_t correct = 0; size_t total = 0; //#pragma omp parallel for for (int i = 0; i < qsize; i++) { std::priority_queue> result = appr_alg.searchKnn(massQ + vecdim * i, 10); std::priority_queue> gt(answers[i]); unordered_set g; total += gt.size(); while (gt.size()) { g.insert(gt.top().second); gt.pop(); } while (result.size()) { if (g.find(result.top().second) != g.end()) correct++; result.pop(); } } return 1.0f * correct / total; } void test_vs_recall(float *massQ, size_t vecsize, size_t qsize, HierarchicalNSW &appr_alg, size_t vecdim, vector>> &answers, size_t k) { //vector efs = { 1,2,3,4,6,8,12,16,24,32,64,128,256,320 };// = ; { 23 }; vector efs; for (int i = 10; i < 30; i++) { efs.push_back(i); } for (int i = 100; i < 2000; i += 100) { efs.push_back(i); } /*for (int i = 300; i <600; i += 20) { efs.push_back(i); }*/ for (size_t ef : efs) { appr_alg.setEf(ef); StopW stopw = StopW(); float recall = test_approx(massQ, vecsize, qsize, appr_alg, vecdim, answers, k); float time_us_per_query = stopw.getElapsedTimeMicro() / qsize; cout << ef << "\t" << recall << "\t" << time_us_per_query << " us\n"; if (recall > 1.0) { cout << recall << "\t" << time_us_per_query << " us\n"; break; } } } //void get_knn_quality(unsigned int *massA,size_t vecsize, size_t maxn, HierarchicalNSW &appr_alg) { // size_t total = 0; // size_t correct = 0; // for (int i = 0; i < vecsize; i++) { // int *data = (int *)(appr_alg.linkList0_ + i * appr_alg.size_links_per_element0_); // //cout << "numconn:" << *data<<"\n"; // tableint *datal = (tableint *)(data + 1); // total += maxn; // for (int j = 0; j < *data; j++) { // labeltype conn = appr_alg.getExternalLabel(datal[j]); // for (int k = 1; k <= maxn; k++) { // if (massA[i * 100 + k] == conn) { // correct++; // break; // } // } // } // if (i % 1000 == 0) { // cout << i << "\t" << correct << "\t" << total << "\n"; // correct = 0; // total = 0; // } // } //} //#include "windows.h" void sift_test() { size_t vecsize = 980000; size_t qsize = 20000; //size_t qsize = 1000; //size_t vecdim = 4; size_t vecdim = 128; float *mass = new float[vecsize * vecdim]; ifstream input("../../sift100k.bin", ios::binary); //ifstream input("../../1M_d=4.bin", ios::binary); input.read((char *) mass, vecsize * vecdim * sizeof(float)); input.close(); float *massQ = new float[qsize * vecdim]; //ifstream inputQ("../siftQ100k.bin", ios::binary); ifstream inputQ("../../siftQ100k.bin", ios::binary); //ifstream inputQ("../../1M_d=4q.bin", ios::binary); inputQ.read((char *) massQ, qsize * vecdim * sizeof(float)); inputQ.close(); unsigned int *massQA = new unsigned int[qsize * 100]; //ifstream inputQA("../knnQA100k.bin", ios::binary); ifstream inputQA("../../knnQA100k.bin", ios::binary); //ifstream inputQA("../../1M_d=4qa.bin", ios::binary); inputQA.read((char *) massQA, qsize * 100 * sizeof(int)); inputQA.close(); int maxn = 16; /*unsigned int *massA = new unsigned int[vecsize * 100]; ifstream inputA("..\\..\\knngraph100k.bin", ios::binary); inputA.read((char *)massA, vecsize * 100 * sizeof(int)); inputA.close();*/ L2Space l2space(vecdim); //BruteforceSearch bs(&l2space, vecsize); //for(int tr=1;tr<9;tr++) //#define LOAD_I #ifdef LOAD_I HierarchicalNSW appr_alg(&l2space, "hnswlib_sift",false); //HierarchicalNSW appr_alg(&l2space, "D:/stuff/hnsw_lib/nmslib/similarity_search/release/temp",true); //HierarchicalNSW appr_alg(&l2space, "/mnt/d/stuff/hnsw_lib/nmslib/similarity_search/release/temp", true); //appr_alg_saved.saveIndex("d:\\hnsw-index.bin"); //appr_alg_saved.loadIndex("d:\\hnsw-index2.bin", &l2space); #else //return; //for (int u = 0; u < 10; u++) { /* PROCESS_MEMORY_COUNTERS pmc; GetProcessMemoryInfo(GetCurrentProcess(), &pmc, sizeof(pmc)); SIZE_T virtualMemUsedByMe = pmc.WorkingSetSize; cout << virtualMemUsedByMe/1000/1000 << "\n";*/ //HierarchicalNSW appr_alg(&l2space, vecsize, 6, 40); HierarchicalNSW appr_alg(&l2space, vecsize, 16, 200); cout << "Building index\n"; StopW stopwb = StopW(); for (int i = 0; i < 1; i++) { appr_alg.addPoint((void *) (mass + vecdim * i), (size_t) i); } #pragma omp parallel for for (int i = 1; i < vecsize; i++) { appr_alg.addPoint((void *) (mass + vecdim * i), (size_t) i); } /*GetProcessMemoryInfo(GetCurrentProcess(), &pmc, sizeof(pmc)); virtualMemUsedByMe = pmc.WorkingSetSize; cout << virtualMemUsedByMe / 1000 / 1000 << "\n";*/ cout << "Index built, time=" << stopwb.getElapsedTimeMicro() * 1e-6 << "\n"; //appr_alg.saveIndex("hnswlib_sift"); //appr_alg.saveIndex("d:\\hnsw-index2.bin"); #endif //get_knn_quality(massA, vecsize, maxn, appr_alg); //return; vector>> answers; size_t k = 10; cout << "Loading gt\n"; //get_gt(mass, massQ, vecsize, qsize, l2space, vecdim, answers,k); get_gt(massQA, massQ, mass, vecsize, qsize, l2space, vecdim, answers, k); cout << "Loaded gt\n"; for (int i = 0; i < 1; i++) test_vs_recall(massQ, vecsize, qsize, appr_alg, vecdim, answers, k); //cout << "opt:\n"; //appr_alg.opt = true; return; //test_approx(mass, massQ, vecsize, qsize, appr_alg, vecdim, answers); // //return; // // cout << appr_alg.maxlevel_ << "\n"; // //CHECK: // //for (size_t io = 0; io < vecsize; io++) { // // if (appr_alg.getExternalLabel(io) != io) // // throw new exception("bad!"); // //} // DISTFUNC fstdistfunc_ = l2space.get_dist_func(); ////#pragma omp parallel for // for (int i = 0; i < vecsize; i++) { // int *data = (int *)(appr_alg.linkList0_ + i * appr_alg.size_links_per_element0_); // //cout << "numconn:" << *data<<"\n"; // tableint *datal = (tableint *)(data + 1); // // std::priority_queue< std::pair< float, tableint >> rez; // unordered_set g; // for (int j = 0; j < *data; j++) { // g.insert(datal[j]); // } // appr_alg.setEf(400); // std::priority_queue< std::pair< float, tableint >> closest_elements = appr_alg.searchKnnInternal(appr_alg.getDataByInternalId(i), 17); // while (closest_elements.size() > 0) { // if (closest_elements.top().second != i) { // g.insert(closest_elements.top().second); // } // closest_elements.pop(); // } // // for (tableint l : g) { // float other = fstdistfunc_(appr_alg.getDataByInternalId(l), appr_alg.getDataByInternalId(i), l2space.get_dist_func_param()); // rez.emplace(other, l); // } // while (rez.size() > 32) // rez.pop(); // int len = rez.size(); // *data = len; // // check there are no loop connections created // for (int j = 0; j < len; j++) { // datal[j] = rez.top().second; // if (datal[j] == i) // throw new exception(); // rez.pop(); // } // // } // // //get_knn_quality(massA, vecsize, maxn, appr_alg); // test_vs_recall( massQ, vecsize, qsize, appr_alg, vecdim, answers, k); // /*test_vs_recall( massQ, vecsize, qsize, appr_alg, vecdim, answers, k); // test_vs_recall( massQ, vecsize, qsize, appr_alg, vecdim, answers, k); // test_vs_recall( massQ, vecsize, qsize, appr_alg, vecdim, answers, k);*/ // // // // // // /*for(int i=0;i<1000;i++) // cout << mass[i] << "\n";*/ // //("11", std::ios::binary); }