diff --git a/README.md b/README.md index 567d3be8..503d75c2 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,12 @@ Header-only C++ HNSW implementation with python bindings. **NEWS:** +**version 0.6.2** + +* Fixed a bug in saving of large pickles. The pickles with > 4GB could have been corrupted. Thanks Kai Wohlfahrt for reporting. +* Thanks to ([@GuyAv46](https://github.com/GuyAv46)) hnswlib inner product now is more consitent accross architectures (SSE, AVX, etc). +* + **version 0.6.1** * Thanks to ([@tony-kuo](https://github.com/tony-kuo)) hnswlib AVX512 and AVX builds are not backwards-compatible with older SSE and non-AVX512 architectures. @@ -235,6 +241,9 @@ or you can install via pip: ### For developers +Contributions are highly welcome! + +Please make pull requests against the `develop` branch. When making changes please run tests (and please add a test to `python_bindings/tests` in case there is new functionality): ```bash @@ -259,10 +268,6 @@ https://github.com/dbaranchuk/ivf-hnsw * .Net implementation: https://github.com/microsoft/HNSW.Net * CUDA implementation: https://github.com/js1010/cuhnsw -### Contributing to the repository -Contributions are highly welcome! - -Please make pull requests against the `develop` branch. ### 200M SIFT test reproduction To download and extract the bigann dataset (from root directory): diff --git a/hnswlib/space_ip.h b/hnswlib/space_ip.h index 7cd3d020..b4266f78 100644 --- a/hnswlib/space_ip.h +++ b/hnswlib/space_ip.h @@ -10,10 +10,15 @@ namespace hnswlib { for (unsigned i = 0; i < qty; i++) { res += ((float *) pVect1)[i] * ((float *) pVect2)[i]; } - return (1.0f - res); + return res; } + static float + InnerProductDistance(const void *pVect1, const void *pVect2, const void *qty_ptr) { + return 1.0f - InnerProduct(pVect1, pVect2, qty_ptr); + } + #if defined(USE_AVX) // Favor using AVX if available. @@ -61,8 +66,13 @@ namespace hnswlib { _mm_store_ps(TmpRes, sum_prod); float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];; - return 1.0f - sum; -} + return sum; + } + + static float + InnerProductDistanceSIMD4ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { + return 1.0f - InnerProductSIMD4ExtAVX(pVect1v, pVect2v, qty_ptr); + } #endif @@ -121,7 +131,12 @@ namespace hnswlib { _mm_store_ps(TmpRes, sum_prod); float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3]; - return 1.0f - sum; + return sum; + } + + static float + InnerProductDistanceSIMD4ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { + return 1.0f - InnerProductSIMD4ExtSSE(pVect1v, pVect2v, qty_ptr); } #endif @@ -156,7 +171,12 @@ namespace hnswlib { _mm512_store_ps(TmpRes, sum512); float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7] + TmpRes[8] + TmpRes[9] + TmpRes[10] + TmpRes[11] + TmpRes[12] + TmpRes[13] + TmpRes[14] + TmpRes[15]; - return 1.0f - sum; + return sum; + } + + static float + InnerProductDistanceSIMD16ExtAVX512(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { + return 1.0f - InnerProductSIMD16ExtAVX512(pVect1v, pVect2v, qty_ptr); } #endif @@ -196,15 +216,20 @@ namespace hnswlib { _mm256_store_ps(TmpRes, sum256); float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7]; - return 1.0f - sum; + return sum; + } + + static float + InnerProductDistanceSIMD16ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { + return 1.0f - InnerProductSIMD16ExtAVX(pVect1v, pVect2v, qty_ptr); } #endif #if defined(USE_SSE) - static float - InnerProductSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { + static float + InnerProductSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { float PORTABLE_ALIGN32 TmpRes[8]; float *pVect1 = (float *) pVect1v; float *pVect2 = (float *) pVect2v; @@ -245,7 +270,12 @@ namespace hnswlib { _mm_store_ps(TmpRes, sum_prod); float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3]; - return 1.0f - sum; + return sum; + } + + static float + InnerProductDistanceSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { + return 1.0f - InnerProductSIMD16ExtSSE(pVect1v, pVect2v, qty_ptr); } #endif @@ -253,9 +283,11 @@ namespace hnswlib { #if defined(USE_SSE) || defined(USE_AVX) || defined(USE_AVX512) DISTFUNC InnerProductSIMD16Ext = InnerProductSIMD16ExtSSE; DISTFUNC InnerProductSIMD4Ext = InnerProductSIMD4ExtSSE; + DISTFUNC InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtSSE; + DISTFUNC InnerProductDistanceSIMD4Ext = InnerProductDistanceSIMD4ExtSSE; static float - InnerProductSIMD16ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { + InnerProductDistanceSIMD16ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { size_t qty = *((size_t *) qty_ptr); size_t qty16 = qty >> 4 << 4; float res = InnerProductSIMD16Ext(pVect1v, pVect2v, &qty16); @@ -264,11 +296,11 @@ namespace hnswlib { size_t qty_left = qty - qty16; float res_tail = InnerProduct(pVect1, pVect2, &qty_left); - return res + res_tail - 1.0f; + return 1.0f - (res + res_tail); } static float - InnerProductSIMD4ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { + InnerProductDistanceSIMD4ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) { size_t qty = *((size_t *) qty_ptr); size_t qty4 = qty >> 2 << 2; @@ -279,7 +311,7 @@ namespace hnswlib { float *pVect2 = (float *) pVect2v + qty4; float res_tail = InnerProduct(pVect1, pVect2, &qty_left); - return res + res_tail - 1.0f; + return 1.0f - (res + res_tail); } #endif @@ -290,30 +322,37 @@ namespace hnswlib { size_t dim_; public: InnerProductSpace(size_t dim) { - fstdistfunc_ = InnerProduct; + fstdistfunc_ = InnerProductDistance; #if defined(USE_AVX) || defined(USE_SSE) || defined(USE_AVX512) #if defined(USE_AVX512) - if (AVX512Capable()) + if (AVX512Capable()) { InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX512; - else if (AVXCapable()) + InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX512; + } else if (AVXCapable()) { InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX; + InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX; + } #elif defined(USE_AVX) - if (AVXCapable()) + if (AVXCapable()) { InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX; + InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX; + } #endif #if defined(USE_AVX) - if (AVXCapable()) + if (AVXCapable()) { InnerProductSIMD4Ext = InnerProductSIMD4ExtAVX; + InnerProductDistanceSIMD4Ext = InnerProductDistanceSIMD4ExtAVX; + } #endif if (dim % 16 == 0) - fstdistfunc_ = InnerProductSIMD16Ext; + fstdistfunc_ = InnerProductDistanceSIMD16Ext; else if (dim % 4 == 0) - fstdistfunc_ = InnerProductSIMD4Ext; + fstdistfunc_ = InnerProductDistanceSIMD4Ext; else if (dim > 16) - fstdistfunc_ = InnerProductSIMD16ExtResiduals; + fstdistfunc_ = InnerProductDistanceSIMD16ExtResiduals; else if (dim > 4) - fstdistfunc_ = InnerProductSIMD4ExtResiduals; + fstdistfunc_ = InnerProductDistanceSIMD4ExtResiduals; #endif dim_ = dim; data_size_ = dim * sizeof(float); @@ -334,5 +373,4 @@ namespace hnswlib { ~InnerProductSpace() {} }; - } diff --git a/python_bindings/bindings.cpp b/python_bindings/bindings.cpp index 4bf91c17..12f38e2e 100644 --- a/python_bindings/bindings.cpp +++ b/python_bindings/bindings.cpp @@ -292,12 +292,12 @@ class Index { py::dict getAnnData() const { /* WARNING: Index::getAnnData is not thread-safe with Index::addItems */ std::unique_lock templock(appr_alg->global); - unsigned int level0_npy_size = appr_alg->cur_element_count * appr_alg->size_data_per_element_; - unsigned int link_npy_size = 0; - std::vector link_npy_offsets(appr_alg->cur_element_count); + size_t level0_npy_size = appr_alg->cur_element_count * appr_alg->size_data_per_element_; + size_t link_npy_size = 0; + std::vector link_npy_offsets(appr_alg->cur_element_count); for (size_t i = 0; i < appr_alg->cur_element_count; i++){ - unsigned int linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0; + size_t linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0; link_npy_offsets[i]=link_npy_size; if (linkListSize) link_npy_size += linkListSize; @@ -326,7 +326,7 @@ class Index { memcpy(element_levels_npy, appr_alg->element_levels_.data(), appr_alg->element_levels_.size() * sizeof(int)); for (size_t i = 0; i < appr_alg->cur_element_count; i++){ - unsigned int linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0; + size_t linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0; if (linkListSize){ memcpy(link_list_npy+link_npy_offsets[i], appr_alg->linkLists_[i], linkListSize); } @@ -500,11 +500,11 @@ class Index { memcpy(appr_alg->element_levels_.data(), element_levels_npy.data(), element_levels_npy.nbytes()); - unsigned int link_npy_size = 0; - std::vector link_npy_offsets(appr_alg->cur_element_count); + size_t link_npy_size = 0; + std::vector link_npy_offsets(appr_alg->cur_element_count); for (size_t i = 0; i < appr_alg->cur_element_count; i++){ - unsigned int linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0; + size_t linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0; link_npy_offsets[i]=link_npy_size; if (linkListSize) link_npy_size += linkListSize; @@ -513,7 +513,7 @@ class Index { memcpy(appr_alg->data_level0_memory_, data_level0_npy.data(), data_level0_npy.nbytes()); for (size_t i = 0; i < appr_alg->max_elements_; i++) { - unsigned int linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0; + size_t linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0; if (linkListSize == 0) { appr_alg->linkLists_[i] = nullptr; } else { diff --git a/setup.py b/setup.py index e01ce76e..161886fd 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,6 @@ import os import sys +import platform import numpy as np import pybind11 @@ -86,6 +87,8 @@ class BuildExt(build_ext): } if sys.platform == 'darwin': + if platform.machine() == 'arm64': + c_opts['unix'].remove('-march=native') c_opts['unix'] += ['-stdlib=libc++', '-mmacosx-version-min=10.7'] link_opts['unix'] += ['-stdlib=libc++', '-mmacosx-version-min=10.7'] else: