From 51450b029e6cded4f4266e71571f75ff108914dc Mon Sep 17 00:00:00 2001 From: Axlgrep Date: Thu, 11 Jul 2024 20:26:06 +0800 Subject: [PATCH 1/2] support DataLevel0BlocksMemory data struct to solve the realloc huge memory doubling problem --- hnswlib/hnswalg.h | 303 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 261 insertions(+), 42 deletions(-) diff --git a/hnswlib/hnswalg.h b/hnswlib/hnswalg.h index f516df59..a2df13ee 100644 --- a/hnswlib/hnswalg.h +++ b/hnswlib/hnswalg.h @@ -14,6 +14,138 @@ namespace hnswlib { typedef unsigned int tableint; typedef unsigned int linklistsizeint; + +class DataLevel0BlocksMemory { + public: + DataLevel0BlocksMemory(size_t size_data_per_element) + : size_data_per_element_(size_data_per_element) { + + assert(size_data_per_element_ > 0); + if (size_data_per_element_ >= MIN_MEMORY_BLOCK_SIZE) { + element_count_per_block_ = 1; + } else { + element_count_per_block_ = (MIN_MEMORY_BLOCK_SIZE + (size_data_per_element_ - 1)) / size_data_per_element_; + } + } + + ~DataLevel0BlocksMemory() { + for (size_t i = 0; i < memory_blocks_.size(); i++) { + assert(memory_blocks_[i] != nullptr); + free(memory_blocks_[i]); + memory_blocks_[i] = nullptr; + } + std::vector().swap(memory_blocks_); + } + + size_t Capacity() { + return capacity_; + } + + size_t ElementCountPerBlock() { + return element_count_per_block_; + } + + void Malloc(size_t max_elements) { + assert(memory_blocks_.empty()); + if (max_elements == 0) { + return; + } + + if (max_elements < element_count_per_block_) { + AppendNonstandardBlock(max_elements); + } else { + size_t added_blocks = max_elements / element_count_per_block_; + AppendStandardBlocks(added_blocks); + + size_t last_block_elements = max_elements % element_count_per_block_; + if (last_block_elements != 0) { + AppendNonstandardBlock(last_block_elements); + } + } + capacity_ = max_elements; + } + + void Realloc(size_t max_elements) { + if (max_elements <= capacity_) { + return; + } + + size_t full_block_count = max_elements / element_count_per_block_; + if (capacity_ % element_count_per_block_ != 0) { + if (full_block_count < memory_blocks_.size()) { + ReallocLastBlocks(max_elements % element_count_per_block_); + capacity_ = max_elements; + return; + } else { + ReallocLastBlocks(element_count_per_block_); + } + } + + assert(full_block_count >= memory_blocks_.size()); + size_t added_blocks = full_block_count - memory_blocks_.size(); + AppendStandardBlocks(added_blocks); + + size_t last_block_elements = max_elements % element_count_per_block_; + if (last_block_elements != 0) { + AppendNonstandardBlock(last_block_elements); + } + capacity_ = max_elements; + } + + char* GetElementPtr(tableint internal_id) { + size_t index = internal_id / element_count_per_block_; + size_t elements_offset_in_block = internal_id % element_count_per_block_; + return memory_blocks_[index] + elements_offset_in_block * size_data_per_element_; + } + + char* GetMemoryBlockPtr(size_t index) { + return memory_blocks_[index]; + } + + private: + DataLevel0BlocksMemory(const DataLevel0BlocksMemory&) = delete; + DataLevel0BlocksMemory& operator=(const DataLevel0BlocksMemory&) = delete; + + void AppendStandardBlocks(size_t count) { + for (size_t i = 0; i < count; i++) { + char* ptr = (char *) malloc(element_count_per_block_ * size_data_per_element_); + if (ptr == nullptr) { + throw std::runtime_error("Not enough memory"); + } else { + memory_blocks_.emplace_back(ptr); + } + } + } + + void AppendNonstandardBlock(size_t elements) { + char* ptr = (char *) malloc(elements * size_data_per_element_); + if (ptr == nullptr) { + throw std::runtime_error("Not enough memory"); + } else { + memory_blocks_.emplace_back(ptr); + } + } + + void ReallocLastBlocks(size_t elements) { + assert(!memory_blocks_.empty()); + assert(capacity_ % element_count_per_block_ < elements && elements <= element_count_per_block_); + size_t last_block_index = memory_blocks_.size() - 1; + char* ptr = (char *) realloc(memory_blocks_[last_block_index], elements * size_data_per_element_); + if (ptr == nullptr) { + throw std::runtime_error("Not enough memory"); + } else { + memory_blocks_[last_block_index] = ptr; + } + } + + size_t capacity_{0}; + size_t size_data_per_element_{0}; + size_t element_count_per_block_{0}; + std::vector memory_blocks_; + + static const size_t MIN_MEMORY_BLOCK_SIZE = 128 * 1024 * 1024; +}; + template class HierarchicalNSW : public AlgorithmInterface { public: @@ -47,7 +179,10 @@ class HierarchicalNSW : public AlgorithmInterface { size_t size_links_level0_{0}; size_t offsetData_{0}, offsetLevel0_{0}, label_offset_{ 0 }; + const bool use_blocks_memory_{true}; char *data_level0_memory_{nullptr}; + DataLevel0BlocksMemory *data_level0_blocks_memory_{nullptr}; + char **linkLists_{nullptr}; std::vector element_levels_; // keeps level of each element @@ -80,8 +215,10 @@ class HierarchicalNSW : public AlgorithmInterface { const std::string &location, bool nmslib = false, size_t max_elements = 0, - bool allow_replace_deleted = false) - : allow_replace_deleted_(allow_replace_deleted) { + bool allow_replace_deleted = false, + bool use_small_blocks_memory = false) + : allow_replace_deleted_(allow_replace_deleted), + use_blocks_memory_(use_small_blocks_memory) { loadIndex(location, s, max_elements); } @@ -92,11 +229,13 @@ class HierarchicalNSW : public AlgorithmInterface { size_t M = 16, size_t ef_construction = 200, size_t random_seed = 100, - bool allow_replace_deleted = false) + bool allow_replace_deleted = false, + bool use_small_blocks_memory = false) : label_op_locks_(MAX_LABEL_OPERATION_LOCKS), link_list_locks_(max_elements), element_levels_(max_elements), - allow_replace_deleted_(allow_replace_deleted) { + allow_replace_deleted_(allow_replace_deleted), + use_blocks_memory_(use_small_blocks_memory) { max_elements_ = max_elements; num_deleted_ = 0; data_size_ = s->get_data_size(); @@ -123,9 +262,15 @@ class HierarchicalNSW : public AlgorithmInterface { label_offset_ = size_links_level0_ + data_size_; offsetLevel0_ = 0; - data_level0_memory_ = (char *) malloc(max_elements_ * size_data_per_element_); - if (data_level0_memory_ == nullptr) - throw std::runtime_error("Not enough memory"); + if (use_blocks_memory_) { + data_level0_blocks_memory_ = new DataLevel0BlocksMemory(size_data_per_element_); + data_level0_blocks_memory_->Malloc(max_elements_); + } else { + data_level0_memory_ = (char *) malloc(max_elements_ * size_data_per_element_); + if (data_level0_memory_ == nullptr) + throw std::runtime_error("Not enough memory"); + } + cur_element_count = 0; @@ -149,8 +294,12 @@ class HierarchicalNSW : public AlgorithmInterface { } void clear() { - free(data_level0_memory_); - data_level0_memory_ = nullptr; + if (use_blocks_memory_) { + delete data_level0_blocks_memory_; + } else { + free(data_level0_memory_); + data_level0_memory_ = nullptr; + } for (tableint i = 0; i < cur_element_count; i++) { if (element_levels_[i] > 0) free(linkLists_[i]); @@ -184,23 +333,39 @@ class HierarchicalNSW : public AlgorithmInterface { inline labeltype getExternalLabel(tableint internal_id) const { labeltype return_label; - memcpy(&return_label, (data_level0_memory_ + internal_id * size_data_per_element_ + label_offset_), sizeof(labeltype)); + if (use_blocks_memory_) { + memcpy(&return_label, (data_level0_blocks_memory_->GetElementPtr(internal_id) + label_offset_), sizeof(labeltype)); + } else { + memcpy(&return_label, (data_level0_memory_ + internal_id * size_data_per_element_ + label_offset_), sizeof(labeltype)); + } return return_label; } inline void setExternalLabel(tableint internal_id, labeltype label) const { - memcpy((data_level0_memory_ + internal_id * size_data_per_element_ + label_offset_), &label, sizeof(labeltype)); + if (use_blocks_memory_) { + memcpy((data_level0_blocks_memory_->GetElementPtr(internal_id) + label_offset_), &label, sizeof(labeltype)); + } else { + memcpy((data_level0_memory_ + internal_id * size_data_per_element_ + label_offset_), &label, sizeof(labeltype)); + } } inline labeltype *getExternalLabeLp(tableint internal_id) const { - return (labeltype *) (data_level0_memory_ + internal_id * size_data_per_element_ + label_offset_); + if (use_blocks_memory_) { + return (labeltype *) (data_level0_blocks_memory_->GetElementPtr(internal_id) + label_offset_); + } else { + return (labeltype *) (data_level0_memory_ + internal_id * size_data_per_element_ + label_offset_); + } } inline char *getDataByInternalId(tableint internal_id) const { - return (data_level0_memory_ + internal_id * size_data_per_element_ + offsetData_); + if (use_blocks_memory_) { + return (data_level0_blocks_memory_->GetElementPtr(internal_id) + offsetData_); + } else { + return (data_level0_memory_ + internal_id * size_data_per_element_ + offsetData_); + } } @@ -266,8 +431,10 @@ class HierarchicalNSW : public AlgorithmInterface { #ifdef USE_SSE _mm_prefetch((char *) (visited_array + *(data + 1)), _MM_HINT_T0); _mm_prefetch((char *) (visited_array + *(data + 1) + 64), _MM_HINT_T0); - _mm_prefetch(getDataByInternalId(*datal), _MM_HINT_T0); - _mm_prefetch(getDataByInternalId(*(datal + 1)), _MM_HINT_T0); + if (1 < size) { + _mm_prefetch(getDataByInternalId(*datal), _MM_HINT_T0); + _mm_prefetch(getDataByInternalId(*(datal + 1)), _MM_HINT_T0); + } #endif for (size_t j = 0; j < size; j++) { @@ -275,7 +442,9 @@ class HierarchicalNSW : public AlgorithmInterface { // if (candidate_id == 0) continue; #ifdef USE_SSE _mm_prefetch((char *) (visited_array + *(datal + j + 1)), _MM_HINT_T0); - _mm_prefetch(getDataByInternalId(*(datal + j + 1)), _MM_HINT_T0); + if (j + 1 < size) { + _mm_prefetch(getDataByInternalId(*(datal + j + 1)), _MM_HINT_T0); + } #endif if (visited_array[candidate_id] == visited_array_tag) continue; visited_array[candidate_id] = visited_array_tag; @@ -304,7 +473,6 @@ class HierarchicalNSW : public AlgorithmInterface { return top_candidates; } - // bare_bone_search means there is no check for deletions and stop condition is ignored in return of extra performance template std::priority_queue, std::vector>, CompareByFirst> @@ -370,7 +538,13 @@ class HierarchicalNSW : public AlgorithmInterface { #ifdef USE_SSE _mm_prefetch((char *) (visited_array + *(data + 1)), _MM_HINT_T0); _mm_prefetch((char *) (visited_array + *(data + 1) + 64), _MM_HINT_T0); - _mm_prefetch(data_level0_memory_ + (*(data + 1)) * size_data_per_element_ + offsetData_, _MM_HINT_T0); + if (use_blocks_memory_) { + if (1 < size) { + _mm_prefetch(data_level0_blocks_memory_->GetElementPtr(*(data + 1)) + offsetData_, _MM_HINT_T0); + } + } else { + _mm_prefetch(data_level0_memory_ + (*(data + 1)) * size_data_per_element_ + offsetData_, _MM_HINT_T0); + } _mm_prefetch((char *) (data + 2), _MM_HINT_T0); #endif @@ -379,8 +553,14 @@ class HierarchicalNSW : public AlgorithmInterface { // if (candidate_id == 0) continue; #ifdef USE_SSE _mm_prefetch((char *) (visited_array + *(data + j + 1)), _MM_HINT_T0); - _mm_prefetch(data_level0_memory_ + (*(data + j + 1)) * size_data_per_element_ + offsetData_, - _MM_HINT_T0); //////////// + if (use_blocks_memory_) { + if (j + 1 <= size) { + _mm_prefetch(data_level0_blocks_memory_->GetElementPtr(*(data + j + 1)) + offsetData_, _MM_HINT_T0); + } + } else { + _mm_prefetch(data_level0_memory_ + (*(data + j + 1)) * size_data_per_element_ + offsetData_, + _MM_HINT_T0); //////////// + } #endif if (!(visited_array[candidate_id] == visited_array_tag)) { visited_array[candidate_id] = visited_array_tag; @@ -398,9 +578,13 @@ class HierarchicalNSW : public AlgorithmInterface { if (flag_consider_candidate) { candidate_set.emplace(-dist, candidate_id); #ifdef USE_SSE - _mm_prefetch(data_level0_memory_ + candidate_set.top().second * size_data_per_element_ + - offsetLevel0_, /////////// - _MM_HINT_T0); //////////////////////// + if (use_blocks_memory_) { + _mm_prefetch(data_level0_blocks_memory_->GetElementPtr(candidate_set.top().second) + offsetLevel0_, _MM_HINT_T0); + } else { + _mm_prefetch(data_level0_memory_ + candidate_set.top().second * size_data_per_element_ + + offsetLevel0_, /////////// + _MM_HINT_T0); //////////////////////// + } #endif if (bare_bone_search || @@ -439,7 +623,6 @@ class HierarchicalNSW : public AlgorithmInterface { return top_candidates; } - void getNeighborsByHeuristic2( std::priority_queue, std::vector>, CompareByFirst> &top_candidates, const size_t M) { @@ -484,12 +667,11 @@ class HierarchicalNSW : public AlgorithmInterface { linklistsizeint *get_linklist0(tableint internal_id) const { - return (linklistsizeint *) (data_level0_memory_ + internal_id * size_data_per_element_ + offsetLevel0_); - } - - - linklistsizeint *get_linklist0(tableint internal_id, char *data_level0_memory_) const { - return (linklistsizeint *) (data_level0_memory_ + internal_id * size_data_per_element_ + offsetLevel0_); + if (use_blocks_memory_) { + return (linklistsizeint *) (data_level0_blocks_memory_->GetElementPtr(internal_id) + offsetLevel0_); + } else { + return (linklistsizeint *) (data_level0_memory_ + internal_id * size_data_per_element_ + offsetLevel0_); + } } @@ -641,10 +823,14 @@ class HierarchicalNSW : public AlgorithmInterface { std::vector(new_max_elements).swap(link_list_locks_); // Reallocate base layer - char * data_level0_memory_new = (char *) realloc(data_level0_memory_, new_max_elements * size_data_per_element_); - if (data_level0_memory_new == nullptr) - throw std::runtime_error("Not enough memory: resizeIndex failed to allocate base layer"); - data_level0_memory_ = data_level0_memory_new; + if (use_blocks_memory_) { + data_level0_blocks_memory_->Realloc(new_max_elements); + } else { + char * data_level0_memory_new = (char *) realloc(data_level0_memory_, new_max_elements * size_data_per_element_); + if (data_level0_memory_new == nullptr) + throw std::runtime_error("Not enough memory: resizeIndex failed to allocate base layer"); + data_level0_memory_ = data_level0_memory_new; + } // Reallocate all other layers char ** linkLists_new = (char **) realloc(linkLists_, sizeof(void *) * new_max_elements); @@ -700,7 +886,19 @@ class HierarchicalNSW : public AlgorithmInterface { writeBinaryPOD(output, mult_); writeBinaryPOD(output, ef_construction_); - output.write(data_level0_memory_, cur_element_count * size_data_per_element_); + if (use_blocks_memory_) { + size_t block_index = 0; + size_t left_element = cur_element_count; + assert(max_elements_ == data_level0_blocks_memory_->Capacity()); + while (left_element > 0) { + size_t write_element_count = std::min(left_element, data_level0_blocks_memory_->ElementCountPerBlock()); + output.write(data_level0_blocks_memory_->GetMemoryBlockPtr(block_index), write_element_count * size_data_per_element_); + left_element -= write_element_count; + block_index++; + } + } else { + output.write(data_level0_memory_, cur_element_count * size_data_per_element_); + } for (size_t i = 0; i < cur_element_count; i++) { unsigned int linkListSize = element_levels_[i] > 0 ? size_links_per_element_ * element_levels_[i] : 0; @@ -773,10 +971,23 @@ class HierarchicalNSW : public AlgorithmInterface { input.seekg(pos, input.beg); - data_level0_memory_ = (char *) malloc(max_elements * size_data_per_element_); - if (data_level0_memory_ == nullptr) - throw std::runtime_error("Not enough memory: loadIndex failed to allocate level0"); - input.read(data_level0_memory_, cur_element_count * size_data_per_element_); + if (use_blocks_memory_) { + size_t block_index = 0; + size_t left_element = cur_element_count; + data_level0_blocks_memory_ = new DataLevel0BlocksMemory(size_data_per_element_); + data_level0_blocks_memory_->Malloc(max_elements); + while (left_element > 0) { + size_t read_element_count = std::min(left_element, data_level0_blocks_memory_->ElementCountPerBlock()); + input.read(data_level0_blocks_memory_->GetMemoryBlockPtr(block_index), read_element_count * size_data_per_element_); + left_element -= read_element_count; + block_index++; + } + } else { + data_level0_memory_ = (char *) malloc(max_elements * size_data_per_element_); + if (data_level0_memory_ == nullptr) + throw std::runtime_error("Not enough memory: loadIndex failed to allocate level0"); + input.read(data_level0_memory_, cur_element_count * size_data_per_element_); + } size_links_per_element_ = maxM_ * sizeof(tableint) + sizeof(linklistsizeint); @@ -1089,11 +1300,15 @@ class HierarchicalNSW : public AlgorithmInterface { int size = getListCount(data); tableint *datal = (tableint *) (data + 1); #ifdef USE_SSE - _mm_prefetch(getDataByInternalId(*datal), _MM_HINT_T0); + if (0 < size) { + _mm_prefetch(getDataByInternalId(*datal), _MM_HINT_T0); + } #endif for (int i = 0; i < size; i++) { #ifdef USE_SSE - _mm_prefetch(getDataByInternalId(*(datal + i + 1)), _MM_HINT_T0); + if (i + 1 < size) { + _mm_prefetch(getDataByInternalId(*(datal + i + 1)), _MM_HINT_T0); + } #endif tableint cand = datal[i]; dist_t d = fstdistfunc_(dataPoint, getDataByInternalId(cand), dist_func_param_); @@ -1196,7 +1411,11 @@ class HierarchicalNSW : public AlgorithmInterface { tableint currObj = enterpoint_node_; tableint enterpoint_copy = enterpoint_node_; - memset(data_level0_memory_ + cur_c * size_data_per_element_ + offsetLevel0_, 0, size_data_per_element_); + if (use_blocks_memory_) { + memset(data_level0_blocks_memory_->GetElementPtr(cur_c) + offsetLevel0_, 0, size_data_per_element_); + } else { + memset(data_level0_memory_ + cur_c * size_data_per_element_ + offsetLevel0_, 0, size_data_per_element_); + } // Initialisation of the data and label memcpy(getExternalLabeLp(cur_c), &label, sizeof(labeltype)); From 2a4cab5dce364587b1f9c64dcd7532c4f47f9f24 Mon Sep 17 00:00:00 2001 From: Axlgrep Date: Fri, 26 Jul 2024 04:48:03 +0800 Subject: [PATCH 2/2] add dataLevel0BlocksMemory test case --- CMakeLists.txt | 3 + tests/cpp/dataLevel0BlocksMemory_test.cpp | 175 ++++++++++++++++++++++ tests/cpp/epsilon_search_test.cpp | 18 ++- tests/cpp/multiThreadLoad_test.cpp | 21 ++- tests/cpp/multiThread_replace_test.cpp | 22 ++- tests/cpp/multivector_search_test.cpp | 18 ++- tests/cpp/searchKnnCloserFirst_test.cpp | 18 ++- 7 files changed, 259 insertions(+), 16 deletions(-) create mode 100644 tests/cpp/dataLevel0BlocksMemory_test.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index be0d40f0..7890e049 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -100,6 +100,9 @@ if(HNSWLIB_EXAMPLES) add_executable(multiThread_replace_test tests/cpp/multiThread_replace_test.cpp) target_link_libraries(multiThread_replace_test hnswlib) + add_executable(dataLevel0BlocksMemory_test tests/cpp/dataLevel0BlocksMemory_test.cpp) + target_link_libraries(dataLevel0BlocksMemory_test hnswlib) + add_executable(main tests/cpp/main.cpp tests/cpp/sift_1b.cpp) target_link_libraries(main hnswlib) endif() diff --git a/tests/cpp/dataLevel0BlocksMemory_test.cpp b/tests/cpp/dataLevel0BlocksMemory_test.cpp new file mode 100644 index 00000000..81035328 --- /dev/null +++ b/tests/cpp/dataLevel0BlocksMemory_test.cpp @@ -0,0 +1,175 @@ +#include "../../hnswlib/hnswlib.h" + +#include + +#include +#include +#include +#include +#include + +namespace { + +const size_t M = 32; +const size_t ef_construction = 500; +const size_t random_seed = 100; +const bool allow_replace_deleted = false; + +const size_t dimension = 1024; +const size_t total_items = 100 * 10000; +const size_t num_query = 500 * 10000; +size_t topk = 10; +const size_t max_thread_num = 48; +const std::string index_path = "./hnsw.index"; + +std::vector data(total_items * dimension); +std::vector query(num_query * dimension); + + +void check_knn_closer(hnswlib::AlgorithmInterface* alg_hnsw) { + for (size_t j = 0; j < num_query; ++j) { + const void* p = query.data() + j * dimension; + auto gd = alg_hnsw->searchKnn(p, topk); + auto res = alg_hnsw->searchKnnCloserFirst(p, topk); + assert(gd.size() == res.size()); + size_t t = gd.size(); + while (!gd.empty()) { + assert(gd.top() == res[--t]); + gd.pop(); + } + } + std::cout << "test hnsw search knn closer first success..." << std::endl; +} + +void test_compatibility(bool hnsw_first_use_blocks_memory, + bool hnsw_second_use_blocks_memory) { + + std::cout << "================== test compatibility ==================" << std::endl; + hnswlib::L2Space space(dimension); + hnswlib::AlgorithmInterface* alg_hnsw_first = new hnswlib::HierarchicalNSW(&space, 2 * total_items, + M, ef_construction, random_seed, allow_replace_deleted, hnsw_first_use_blocks_memory); + + for (size_t i = 0; i < total_items; ++i) { + alg_hnsw_first->addPoint(data.data() + dimension * i, i); + } + check_knn_closer(alg_hnsw_first); + + // save hnsw index + std::remove(index_path.data()); + alg_hnsw_first->saveIndex(index_path); + std::cout << "save hnsw(use_small_blocks_memory = " << hnsw_first_use_blocks_memory << ") index success" << std::endl; + delete alg_hnsw_first; + + // load hnsw index + hnswlib::AlgorithmInterface* alg_hnsw_second = new hnswlib::HierarchicalNSW(&space, false, + 0, allow_replace_deleted, hnsw_second_use_blocks_memory); + std::cout << "load hnsw(use_small_blocks_memory = " << hnsw_second_use_blocks_memory << ") index success" << std::endl; + std::remove(index_path.data()); + check_knn_closer(alg_hnsw_second); + + delete alg_hnsw_second; +} + +void test_performace(bool use_small_blocks_memory) { + if (total_items == 0) { + return; + } + + std::cout << "================== test preformace(" + << "dimension: " << dimension + << ", M: " << M + << ", ef_construction: " << ef_construction + << ", topk: " << topk + << ", use_small_blocks_memory: " << (use_small_blocks_memory ? "ture" : "false" ) + << ") ==================" << std::endl; + hnswlib::L2Space space(dimension); + hnswlib::HierarchicalNSW* alg_hnsw = new hnswlib::HierarchicalNSW(&space, 2 * total_items, + M, ef_construction, random_seed, allow_replace_deleted, use_small_blocks_memory); + + std::vector threads; + size_t num_threads = (total_items >= max_thread_num ? max_thread_num : total_items); + size_t batch_num = (total_items / (num_threads <= 1 ? 1 : (num_threads - 1))) + 1; + auto start_time = std::chrono::system_clock::now(); + for (size_t idx = 0; idx < total_items; idx += batch_num) { + size_t start = idx; + size_t end = std::min(idx + batch_num, total_items); + threads.push_back( + std::thread( + [alg_hnsw, start, end] { + for (size_t i = start; i < end; i++) { + alg_hnsw->addPoint(data.data() + i * dimension, i); + } + } + ) + ); + } + for (auto &thread : threads) { + thread.join(); + } + threads.clear(); + auto end_time = std::chrono::system_clock::now(); + double duration_in_ms = std::chrono::duration_cast(end_time - start_time).count(); + double duration_in_seconds = static_cast((std::chrono::duration_cast(end_time - start_time)).count()) / 1000.0; + size_t qps = (duration_in_seconds == 0 ? total_items : total_items / duration_in_seconds); + double latency = (total_items == 0 ? 0 : duration_in_ms / total_items); + std::cout << "Start " << num_threads << " thread to add " << total_items << " items to hnsw index, cost " + << duration_in_seconds << " seconds, qps: " << qps << ", latency: " << latency << "ms" << std::endl; + + + num_threads = (num_query >= max_thread_num ? max_thread_num : num_query); + batch_num = (num_query / (num_threads <= 1 ? 1 : (num_threads - 1))) + 1; + start_time = std::chrono::system_clock::now(); + for (size_t idx = 0; idx < num_query; idx += batch_num) { + size_t start = idx; + size_t end = std::min(idx + batch_num, num_query); + threads.push_back( + std::thread( + [alg_hnsw, start, end] { + for (size_t i = start; i < end; i++) { + const void* p = query.data() + i * dimension; + auto gd = alg_hnsw->searchKnn(p, topk); + } + } + ) + ); + } + for (auto &thread : threads) { + thread.join(); + } + threads.clear(); + end_time = std::chrono::system_clock::now(); + duration_in_ms = std::chrono::duration_cast(end_time - start_time).count(); + duration_in_seconds = static_cast((std::chrono::duration_cast(end_time - start_time)).count()) / 1000.0; + qps = (duration_in_seconds == 0 ? num_query : num_query / duration_in_seconds); + latency = (num_query == 0 ? 0 : duration_in_ms / num_query); + std::cout << "Start " << num_threads << " thread to exec " << num_query << " searchKnn, cost " + << duration_in_seconds << " seconds, qps: " << qps << ", latency: " << latency << "ms" << std::endl; + + delete alg_hnsw; +} + +} // namespace + +int main() { + + std::mt19937 rng; + rng.seed(47); + std::uniform_real_distribution<> distrib; + + for (size_t i = 0; i < total_items * dimension; ++i) { + data[i] = distrib(rng); + } + for (size_t i = 0; i < num_query * dimension; ++i) { + query[i] = distrib(rng); + } + + test_compatibility(true, true); + test_compatibility(false, false); + test_compatibility(true, false); + test_compatibility(false, true); + + test_performace(true); + test_performace(false); + + return 0; +} diff --git a/tests/cpp/epsilon_search_test.cpp b/tests/cpp/epsilon_search_test.cpp index 38df6246..6a539515 100644 --- a/tests/cpp/epsilon_search_test.cpp +++ b/tests/cpp/epsilon_search_test.cpp @@ -4,12 +4,14 @@ typedef unsigned int docidtype; typedef float dist_t; -int main() { +void test(bool use_small_blocks_memory) { int dim = 16; // Dimension of the elements int max_elements = 10000; // Maximum number of elements, should be known beforehand int M = 16; // Tightly connected with internal dimensionality of the data // strongly affects the memory consumption int ef_construction = 200; // Controls index search speed/build speed tradeoff + size_t random_seed = 100; + bool allow_replace_deleted = false; int num_queries = 100; float epsilon2 = 1.0; // Squared distance to query @@ -20,7 +22,8 @@ int main() { // Initing index hnswlib::L2Space space(dim); hnswlib::BruteforceSearch* alg_brute = new hnswlib::BruteforceSearch(&space, max_elements); - hnswlib::HierarchicalNSW* alg_hnsw = new hnswlib::HierarchicalNSW(&space, max_elements, M, ef_construction); + hnswlib::HierarchicalNSW* alg_hnsw = new hnswlib::HierarchicalNSW(&space, max_elements, + M, ef_construction, random_seed, allow_replace_deleted, use_small_blocks_memory); // Generate random data std::mt19937 rng; @@ -110,5 +113,14 @@ int main() { delete[] data; delete alg_brute; delete alg_hnsw; - return 0; +} + +int main() { + std::cout << "Testing with use default memory allocator..." << std::endl; + test(false); + std::cout << "Test ok" << std::endl; + + std::cout << "Testing with use block memory allocator..." << std::endl; + test(true); + std::cout << "Test ok" << std::endl; } diff --git a/tests/cpp/multiThreadLoad_test.cpp b/tests/cpp/multiThreadLoad_test.cpp index 4d2b4aa2..7924b1a2 100644 --- a/tests/cpp/multiThreadLoad_test.cpp +++ b/tests/cpp/multiThreadLoad_test.cpp @@ -3,8 +3,13 @@ #include -int main() { +void test(bool use_small_blocks_memory) { std::cout << "Running multithread load test" << std::endl; + size_t M = 16; + size_t ef_construction = 200; + size_t random_seed = 100; + bool allow_replace_deleted = false; + int d = 16; int max_elements = 1000; @@ -13,7 +18,8 @@ int main() { std::uniform_real_distribution<> distrib_real; hnswlib::L2Space space(d); - hnswlib::HierarchicalNSW* alg_hnsw = new hnswlib::HierarchicalNSW(&space, 2 * max_elements); + hnswlib::HierarchicalNSW* alg_hnsw = new hnswlib::HierarchicalNSW(&space, 2 * max_elements, + M, ef_construction, random_seed, allow_replace_deleted, use_small_blocks_memory); std::cout << "Building index" << std::endl; int num_threads = 40; @@ -136,5 +142,14 @@ int main() { } std::cout << "Finish" << std::endl; - return 0; +} + +int main() { + std::cout << "Testing with use default memory allocator..." << std::endl; + test(false); + std::cout << "Test ok" << std::endl; + + std::cout << "Testing with use block memory allocator..." << std::endl; + test(true); + std::cout << "Test ok" << std::endl; } diff --git a/tests/cpp/multiThread_replace_test.cpp b/tests/cpp/multiThread_replace_test.cpp index 203cdb0d..567ce3df 100644 --- a/tests/cpp/multiThread_replace_test.cpp +++ b/tests/cpp/multiThread_replace_test.cpp @@ -58,13 +58,19 @@ inline void ParallelFor(size_t start, size_t end, size_t numThreads, Function fn } -int main() { +void test(bool use_small_blocks_memory) { std::cout << "Running multithread load test" << std::endl; int d = 16; int num_elements = 1000; int max_elements = 2 * num_elements; int num_threads = 50; + int M = 16; // Tightly connected with internal dimensionality of the data + // strongly affects the memory consumption + int ef_construction = 200; // Controls index search speed/build speed tradeoff + size_t random_seed = 100; + bool allow_replace_deleted = true; + std::mt19937 rng; rng.seed(47); std::uniform_real_distribution<> distrib_real; @@ -90,7 +96,7 @@ int main() { int iter = 0; while (iter < 200) { - hnswlib::HierarchicalNSW* alg_hnsw = new hnswlib::HierarchicalNSW(&space, max_elements, 16, 200, 123, true); + hnswlib::HierarchicalNSW* alg_hnsw = new hnswlib::HierarchicalNSW(&space, max_elements, M, ef_construction, 123, true, use_small_blocks_memory); // add batch1 data ParallelFor(0, max_elements, num_threads, [&](size_t row, size_t threadId) { @@ -117,5 +123,15 @@ int main() { delete[] batch1; delete[] batch2; - return 0; +} + + +int main() { + std::cout << "Testing with use default memory allocator..." << std::endl; + test(false); + std::cout << "Test ok" << std::endl; + + std::cout << "Testing with use block memory allocator..." << std::endl; + test(true); + std::cout << "Test ok" << std::endl; } diff --git a/tests/cpp/multivector_search_test.cpp b/tests/cpp/multivector_search_test.cpp index be783176..bb2817bc 100644 --- a/tests/cpp/multivector_search_test.cpp +++ b/tests/cpp/multivector_search_test.cpp @@ -4,12 +4,14 @@ typedef unsigned int docidtype; typedef float dist_t; -int main() { +void test(bool use_small_blocks_memory) { int dim = 16; // Dimension of the elements int max_elements = 1000; // Maximum number of elements, should be known beforehand int M = 16; // Tightly connected with internal dimensionality of the data // strongly affects the memory consumption int ef_construction = 200; // Controls index search speed/build speed tradeoff + size_t random_seed = 100; + bool allow_replace_deleted = false; int num_queries = 100; int num_docs = 10; // Number of documents to search @@ -21,7 +23,8 @@ int main() { // Initing index hnswlib::MultiVectorL2Space space(dim); hnswlib::BruteforceSearch* alg_brute = new hnswlib::BruteforceSearch(&space, max_elements); - hnswlib::HierarchicalNSW* alg_hnsw = new hnswlib::HierarchicalNSW(&space, max_elements, M, ef_construction); + hnswlib::HierarchicalNSW* alg_hnsw = new hnswlib::HierarchicalNSW(&space, max_elements, + M, ef_construction, random_seed, allow_replace_deleted, use_small_blocks_memory); // Generate random data std::mt19937 rng; @@ -122,5 +125,14 @@ int main() { delete[] data; delete alg_brute; delete alg_hnsw; - return 0; +} + +int main() { + std::cout << "Testing with use default memory allocator..." << std::endl; + test(false); + std::cout << "Test ok" << std::endl; + + std::cout << "Testing with use block memory allocator..." << std::endl; + test(true); + std::cout << "Test ok" << std::endl; } diff --git a/tests/cpp/searchKnnCloserFirst_test.cpp b/tests/cpp/searchKnnCloserFirst_test.cpp index 9583fe22..a48d9086 100644 --- a/tests/cpp/searchKnnCloserFirst_test.cpp +++ b/tests/cpp/searchKnnCloserFirst_test.cpp @@ -14,7 +14,12 @@ namespace { using idx_t = hnswlib::labeltype; -void test() { +void test(bool use_small_blocks_memory) { + size_t M = 16; + size_t ef_construction = 200; + size_t random_seed = 100; + bool allow_replace_deleted = false; + int d = 4; idx_t n = 100; idx_t nq = 10; @@ -36,7 +41,8 @@ void test() { hnswlib::L2Space space(d); hnswlib::AlgorithmInterface* alg_brute = new hnswlib::BruteforceSearch(&space, 2 * n); - hnswlib::AlgorithmInterface* alg_hnsw = new hnswlib::HierarchicalNSW(&space, 2 * n); + hnswlib::AlgorithmInterface* alg_hnsw = new hnswlib::HierarchicalNSW(&space, 2 * n, + M, ef_construction, random_seed, allow_replace_deleted, use_small_blocks_memory); for (size_t i = 0; i < n; ++i) { alg_brute->addPoint(data.data() + d * i, i); @@ -74,8 +80,12 @@ void test() { } // namespace int main() { - std::cout << "Testing ..." << std::endl; - test(); + std::cout << "Testing with use default memory allocator..." << std::endl; + test(false); + std::cout << "Test ok" << std::endl; + + std::cout << "Testing with use block memory allocator..." << std::endl; + test(true); std::cout << "Test ok" << std::endl; return 0;