Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support DataLevel0BlocksMemory data struct #577

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,9 @@ if(HNSWLIB_EXAMPLES)
add_executable(multiThread_replace_test tests/cpp/multiThread_replace_test.cpp)
target_link_libraries(multiThread_replace_test hnswlib)

add_executable(dataLevel0BlocksMemory_test tests/cpp/dataLevel0BlocksMemory_test.cpp)
target_link_libraries(dataLevel0BlocksMemory_test hnswlib)

add_executable(main tests/cpp/main.cpp tests/cpp/sift_1b.cpp)
target_link_libraries(main hnswlib)
endif()
303 changes: 261 additions & 42 deletions hnswlib/hnswalg.h

Large diffs are not rendered by default.

175 changes: 175 additions & 0 deletions tests/cpp/dataLevel0BlocksMemory_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
#include "../../hnswlib/hnswlib.h"

#include <assert.h>

#include <vector>
#include <iostream>
#include <cstdio>
#include <thread>
#include <chrono>

namespace {

const size_t M = 32;
const size_t ef_construction = 500;
const size_t random_seed = 100;
const bool allow_replace_deleted = false;

const size_t dimension = 1024;
const size_t total_items = 100 * 10000;
const size_t num_query = 500 * 10000;
Comment on lines +18 to +20
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With such parameters, it takes a lot of time to run the test.

size_t topk = 10;
const size_t max_thread_num = 48;
const std::string index_path = "./hnsw.index";

std::vector<float> data(total_items * dimension);
std::vector<float> query(num_query * dimension);


void check_knn_closer(hnswlib::AlgorithmInterface<float>* alg_hnsw) {
for (size_t j = 0; j < num_query; ++j) {
const void* p = query.data() + j * dimension;
auto gd = alg_hnsw->searchKnn(p, topk);
auto res = alg_hnsw->searchKnnCloserFirst(p, topk);
assert(gd.size() == res.size());
size_t t = gd.size();
while (!gd.empty()) {
assert(gd.top() == res[--t]);
gd.pop();
}
}
std::cout << "test hnsw search knn closer first success..." << std::endl;
}

void test_compatibility(bool hnsw_first_use_blocks_memory,
bool hnsw_second_use_blocks_memory) {

std::cout << "================== test compatibility ==================" << std::endl;
hnswlib::L2Space space(dimension);
hnswlib::AlgorithmInterface<float>* alg_hnsw_first = new hnswlib::HierarchicalNSW<float>(&space, 2 * total_items,
M, ef_construction, random_seed, allow_replace_deleted, hnsw_first_use_blocks_memory);

for (size_t i = 0; i < total_items; ++i) {
alg_hnsw_first->addPoint(data.data() + dimension * i, i);
}
check_knn_closer(alg_hnsw_first);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure what we are supposed to check here. searchKnnCloserFirst only calls searchKnn and rearranges the found elements."


// save hnsw index
std::remove(index_path.data());
alg_hnsw_first->saveIndex(index_path);
std::cout << "save hnsw(use_small_blocks_memory = " << hnsw_first_use_blocks_memory << ") index success" << std::endl;
delete alg_hnsw_first;

// load hnsw index
hnswlib::AlgorithmInterface<float>* alg_hnsw_second = new hnswlib::HierarchicalNSW<float>(&space, false,
0, allow_replace_deleted, hnsw_second_use_blocks_memory);
Comment on lines +63 to +65
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here we don't load index. We initialize empty index (max_elements=0). I think something is missing here

std::cout << "load hnsw(use_small_blocks_memory = " << hnsw_second_use_blocks_memory << ") index success" << std::endl;
std::remove(index_path.data());
check_knn_closer(alg_hnsw_second);

delete alg_hnsw_second;
}

void test_performace(bool use_small_blocks_memory) {
if (total_items == 0) {
return;
}

std::cout << "================== test preformace("
<< "dimension: " << dimension
<< ", M: " << M
<< ", ef_construction: " << ef_construction
<< ", topk: " << topk
<< ", use_small_blocks_memory: " << (use_small_blocks_memory ? "ture" : "false" )
<< ") ==================" << std::endl;
hnswlib::L2Space space(dimension);
hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, 2 * total_items,
M, ef_construction, random_seed, allow_replace_deleted, use_small_blocks_memory);

std::vector<std::thread> threads;
size_t num_threads = (total_items >= max_thread_num ? max_thread_num : total_items);
size_t batch_num = (total_items / (num_threads <= 1 ? 1 : (num_threads - 1))) + 1;
auto start_time = std::chrono::system_clock::now();
for (size_t idx = 0; idx < total_items; idx += batch_num) {
size_t start = idx;
size_t end = std::min(idx + batch_num, total_items);
threads.push_back(
std::thread(
[alg_hnsw, start, end] {
for (size_t i = start; i < end; i++) {
alg_hnsw->addPoint(data.data() + i * dimension, i);
}
}
)
);
}
for (auto &thread : threads) {
thread.join();
}
threads.clear();
auto end_time = std::chrono::system_clock::now();
double duration_in_ms = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time).count();
double duration_in_seconds = static_cast<double>((std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time)).count()) / 1000.0;
size_t qps = (duration_in_seconds == 0 ? total_items : total_items / duration_in_seconds);
double latency = (total_items == 0 ? 0 : duration_in_ms / total_items);
std::cout << "Start " << num_threads << " thread to add " << total_items << " items to hnsw index, cost "
<< duration_in_seconds << " seconds, qps: " << qps << ", latency: " << latency << "ms" << std::endl;


num_threads = (num_query >= max_thread_num ? max_thread_num : num_query);
batch_num = (num_query / (num_threads <= 1 ? 1 : (num_threads - 1))) + 1;
start_time = std::chrono::system_clock::now();
for (size_t idx = 0; idx < num_query; idx += batch_num) {
size_t start = idx;
size_t end = std::min(idx + batch_num, num_query);
threads.push_back(
std::thread(
[alg_hnsw, start, end] {
for (size_t i = start; i < end; i++) {
const void* p = query.data() + i * dimension;
auto gd = alg_hnsw->searchKnn(p, topk);
}
}
)
);
}
for (auto &thread : threads) {
thread.join();
}
threads.clear();
end_time = std::chrono::system_clock::now();
duration_in_ms = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time).count();
duration_in_seconds = static_cast<double>((std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time)).count()) / 1000.0;
qps = (duration_in_seconds == 0 ? num_query : num_query / duration_in_seconds);
latency = (num_query == 0 ? 0 : duration_in_ms / num_query);
std::cout << "Start " << num_threads << " thread to exec " << num_query << " searchKnn, cost "
<< duration_in_seconds << " seconds, qps: " << qps << ", latency: " << latency << "ms" << std::endl;

delete alg_hnsw;
}

} // namespace

int main() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please add a test to check the resizeIndex function as well?
For example, we can call resizeIndex multiple times and then verify that the recall is high and all elements' data is valid in the index.


std::mt19937 rng;
rng.seed(47);
std::uniform_real_distribution<> distrib;

for (size_t i = 0; i < total_items * dimension; ++i) {
data[i] = distrib(rng);
}
for (size_t i = 0; i < num_query * dimension; ++i) {
query[i] = distrib(rng);
}

test_compatibility(true, true);
test_compatibility(false, false);
test_compatibility(true, false);
test_compatibility(false, true);

test_performace(true);
test_performace(false);

return 0;
}
18 changes: 15 additions & 3 deletions tests/cpp/epsilon_search_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
typedef unsigned int docidtype;
typedef float dist_t;

int main() {
void test(bool use_small_blocks_memory) {
int dim = 16; // Dimension of the elements
int max_elements = 10000; // Maximum number of elements, should be known beforehand
int M = 16; // Tightly connected with internal dimensionality of the data
// strongly affects the memory consumption
int ef_construction = 200; // Controls index search speed/build speed tradeoff
size_t random_seed = 100;
bool allow_replace_deleted = false;

int num_queries = 100;
float epsilon2 = 1.0; // Squared distance to query
Expand All @@ -20,7 +22,8 @@ int main() {
// Initing index
hnswlib::L2Space space(dim);
hnswlib::BruteforceSearch<dist_t>* alg_brute = new hnswlib::BruteforceSearch<dist_t>(&space, max_elements);
hnswlib::HierarchicalNSW<dist_t>* alg_hnsw = new hnswlib::HierarchicalNSW<dist_t>(&space, max_elements, M, ef_construction);
hnswlib::HierarchicalNSW<dist_t>* alg_hnsw = new hnswlib::HierarchicalNSW<dist_t>(&space, max_elements,
M, ef_construction, random_seed, allow_replace_deleted, use_small_blocks_memory);

// Generate random data
std::mt19937 rng;
Expand Down Expand Up @@ -110,5 +113,14 @@ int main() {
delete[] data;
delete alg_brute;
delete alg_hnsw;
return 0;
}

int main() {
std::cout << "Testing with use default memory allocator..." << std::endl;
test(false);
std::cout << "Test ok" << std::endl;

std::cout << "Testing with use block memory allocator..." << std::endl;
test(true);
std::cout << "Test ok" << std::endl;
}
21 changes: 18 additions & 3 deletions tests/cpp/multiThreadLoad_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,13 @@
#include <chrono>


int main() {
void test(bool use_small_blocks_memory) {
std::cout << "Running multithread load test" << std::endl;
size_t M = 16;
size_t ef_construction = 200;
size_t random_seed = 100;
bool allow_replace_deleted = false;

int d = 16;
int max_elements = 1000;

Expand All @@ -13,7 +18,8 @@ int main() {
std::uniform_real_distribution<> distrib_real;

hnswlib::L2Space space(d);
hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, 2 * max_elements);
hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, 2 * max_elements,
M, ef_construction, random_seed, allow_replace_deleted, use_small_blocks_memory);

std::cout << "Building index" << std::endl;
int num_threads = 40;
Expand Down Expand Up @@ -136,5 +142,14 @@ int main() {
}

std::cout << "Finish" << std::endl;
return 0;
}

int main() {
std::cout << "Testing with use default memory allocator..." << std::endl;
test(false);
std::cout << "Test ok" << std::endl;

std::cout << "Testing with use block memory allocator..." << std::endl;
test(true);
std::cout << "Test ok" << std::endl;
}
22 changes: 19 additions & 3 deletions tests/cpp/multiThread_replace_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,19 @@ inline void ParallelFor(size_t start, size_t end, size_t numThreads, Function fn
}


int main() {
void test(bool use_small_blocks_memory) {
std::cout << "Running multithread load test" << std::endl;
int d = 16;
int num_elements = 1000;
int max_elements = 2 * num_elements;
int num_threads = 50;

int M = 16; // Tightly connected with internal dimensionality of the data
// strongly affects the memory consumption
int ef_construction = 200; // Controls index search speed/build speed tradeoff
size_t random_seed = 100;
bool allow_replace_deleted = true;

std::mt19937 rng;
rng.seed(47);
std::uniform_real_distribution<> distrib_real;
Expand All @@ -90,7 +96,7 @@ int main() {

int iter = 0;
while (iter < 200) {
hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, max_elements, 16, 200, 123, true);
hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, max_elements, M, ef_construction, 123, true, use_small_blocks_memory);

// add batch1 data
ParallelFor(0, max_elements, num_threads, [&](size_t row, size_t threadId) {
Expand All @@ -117,5 +123,15 @@ int main() {

delete[] batch1;
delete[] batch2;
return 0;
}


int main() {
std::cout << "Testing with use default memory allocator..." << std::endl;
test(false);
std::cout << "Test ok" << std::endl;

std::cout << "Testing with use block memory allocator..." << std::endl;
test(true);
std::cout << "Test ok" << std::endl;
}
18 changes: 15 additions & 3 deletions tests/cpp/multivector_search_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
typedef unsigned int docidtype;
typedef float dist_t;

int main() {
void test(bool use_small_blocks_memory) {
int dim = 16; // Dimension of the elements
int max_elements = 1000; // Maximum number of elements, should be known beforehand
int M = 16; // Tightly connected with internal dimensionality of the data
// strongly affects the memory consumption
int ef_construction = 200; // Controls index search speed/build speed tradeoff
size_t random_seed = 100;
bool allow_replace_deleted = false;

int num_queries = 100;
int num_docs = 10; // Number of documents to search
Expand All @@ -21,7 +23,8 @@ int main() {
// Initing index
hnswlib::MultiVectorL2Space<docidtype> space(dim);
hnswlib::BruteforceSearch<dist_t>* alg_brute = new hnswlib::BruteforceSearch<dist_t>(&space, max_elements);
hnswlib::HierarchicalNSW<dist_t>* alg_hnsw = new hnswlib::HierarchicalNSW<dist_t>(&space, max_elements, M, ef_construction);
hnswlib::HierarchicalNSW<dist_t>* alg_hnsw = new hnswlib::HierarchicalNSW<dist_t>(&space, max_elements,
M, ef_construction, random_seed, allow_replace_deleted, use_small_blocks_memory);

// Generate random data
std::mt19937 rng;
Expand Down Expand Up @@ -122,5 +125,14 @@ int main() {
delete[] data;
delete alg_brute;
delete alg_hnsw;
return 0;
}

int main() {
std::cout << "Testing with use default memory allocator..." << std::endl;
test(false);
std::cout << "Test ok" << std::endl;

std::cout << "Testing with use block memory allocator..." << std::endl;
test(true);
std::cout << "Test ok" << std::endl;
}
18 changes: 14 additions & 4 deletions tests/cpp/searchKnnCloserFirst_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,12 @@ namespace {

using idx_t = hnswlib::labeltype;

void test() {
void test(bool use_small_blocks_memory) {
size_t M = 16;
size_t ef_construction = 200;
size_t random_seed = 100;
bool allow_replace_deleted = false;

int d = 4;
idx_t n = 100;
idx_t nq = 10;
Expand All @@ -36,7 +41,8 @@ void test() {

hnswlib::L2Space space(d);
hnswlib::AlgorithmInterface<float>* alg_brute = new hnswlib::BruteforceSearch<float>(&space, 2 * n);
hnswlib::AlgorithmInterface<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, 2 * n);
hnswlib::AlgorithmInterface<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, 2 * n,
M, ef_construction, random_seed, allow_replace_deleted, use_small_blocks_memory);

for (size_t i = 0; i < n; ++i) {
alg_brute->addPoint(data.data() + d * i, i);
Expand Down Expand Up @@ -74,8 +80,12 @@ void test() {
} // namespace

int main() {
std::cout << "Testing ..." << std::endl;
test();
std::cout << "Testing with use default memory allocator..." << std::endl;
test(false);
std::cout << "Test ok" << std::endl;

std::cout << "Testing with use block memory allocator..." << std::endl;
test(true);
std::cout << "Test ok" << std::endl;

return 0;
Expand Down
Loading