From 0e4ee9faa348c9dc2a5e5c11498f6cab4129df39 Mon Sep 17 00:00:00 2001 From: achirkin Date: Fri, 13 Jan 2023 15:02:11 +0100 Subject: [PATCH] Tweak the sensitivity for more test cases --- cpp/test/neighbors/ann_ivf_pq.cuh | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/cpp/test/neighbors/ann_ivf_pq.cuh b/cpp/test/neighbors/ann_ivf_pq.cuh index 42f500f2d9..a3119f761c 100644 --- a/cpp/test/neighbors/ann_ivf_pq.cuh +++ b/cpp/test/neighbors/ann_ivf_pq.cuh @@ -236,15 +236,16 @@ class ivf_pq_test : public ::testing::TestWithParam { update_host(indices_ivf_pq.data(), indices_ivf_pq_dev.data(), queries_size, stream_); handle_.sync_stream(stream_); - // Using very dense, small codebooks results in large errors in the distance calculation - double low_precision_factor = - static_cast(ps.dim * 8) / static_cast(index.pq_dim() * index.pq_bits()); // A very conservative lower bound on recall double min_recall = static_cast(ps.search_params.n_probes) / static_cast(ps.index_params.n_lists); + double low_precision_factor = + static_cast(ps.dim * 8) / static_cast(index.pq_dim() * index.pq_bits()); + // Using a heuristic to lower the required recall due to code-packing errors + min_recall = + std::min(std::erfc(0.1 * low_precision_factor / std::max(min_recall, 0.5)), min_recall); // Use explicit per-test min recall value if provided. - // TODO: investigate what prevents 100% - min_recall = std::min(1 - 0.01 * low_precision_factor, ps.min_recall.value_or(min_recall)); + min_recall = ps.min_recall.value_or(min_recall); ASSERT_TRUE(eval_neighbours(indices_ref, indices_ivf_pq, @@ -394,7 +395,7 @@ inline auto big_dims_small_lut() -> test_cases_t y.index_params.pq_dim = round_up_safe(div_rounding_up_safe(x.dim, pq_len), 4u); y.index_params.pq_bits = 6; y.search_params.lut_dtype = CUDA_R_8U; - y.min_recall = 0.21; + y.min_recall = 0.2; return y; }); } @@ -422,7 +423,7 @@ inline auto enum_variety() -> test_cases_t ADD_CASE({ x.index_params.codebook_kind = ivf_pq::codebook_gen::PER_CLUSTER; x.index_params.pq_bits = 4; - x.min_recall = 0.74; + x.min_recall = 0.73; }); ADD_CASE({ x.index_params.codebook_kind = ivf_pq::codebook_gen::PER_CLUSTER; @@ -491,8 +492,15 @@ inline auto enum_variety_ip() -> test_cases_t return map(enum_variety(), [](const ivf_pq_inputs& x) { ivf_pq_inputs y(x); if (y.min_recall.has_value()) { - // Apparently InnerProduct tends to give higher scores for these parameter values. - y.min_recall = y.min_recall.value() * 1.15; + if (y.search_params.lut_dtype == CUDA_R_8U) { + // InnerProduct score is signed, + // thus we're forced to used signed 8-bit representation, + // thus we have one bit less precision + y.min_recall = y.min_recall.value() * 0.95; + } else { + // Apparently InnerProduct tends to give higher scores for these parameter values. + y.min_recall = y.min_recall.value() * 1.12; + } } y.index_params.metric = distance::DistanceType::InnerProduct; return y;