========================================================================= FAILURES ========================================================================= ____________________________________________ test_knn_separate_index_search[braycurtis-50-1000-500000-ndarray] _____________________________________________ input_type = 'ndarray', nrows = 500000, n_feats = 1000, k = 50, metric = 'braycurtis' @pytest.mark.parametrize('input_type', ['dataframe', 'ndarray']) @pytest.mark.parametrize('nrows', [unit_param(500), quality_param(5000), stress_param(500000)]) @pytest.mark.parametrize('n_feats', [unit_param(3), quality_param(100), stress_param(1000)]) @pytest.mark.parametrize('k', [unit_param(3), quality_param(30), stress_param(50)]) @pytest.mark.parametrize("metric", valid_metrics()) def test_knn_separate_index_search(input_type, nrows, n_feats, k, metric): X, _ = make_blobs(n_samples=nrows, n_features=n_feats, random_state=0) X_index = X[:100] X_search = X[101:] p = 5 # Testing 5-norm of the minkowski metric only knn_sk = skKNN(metric=metric, p=p) # Testing knn_sk.fit(X_index) D_sk, I_sk = knn_sk.kneighbors(X_search, k) X_orig = X_index if input_type == "dataframe": X_index = cudf.DataFrame(X_index) X_search = cudf.DataFrame(X_search) knn_cu = cuKNN(metric=metric, p=p) knn_cu.fit(X_index) D_cuml, I_cuml = knn_cu.kneighbors(X_search, k) if input_type == "dataframe": assert isinstance(D_cuml, cudf.DataFrame) assert isinstance(I_cuml, cudf.DataFrame) D_cuml_arr = D_cuml.as_gpu_matrix().copy_to_host() I_cuml_arr = I_cuml.as_gpu_matrix().copy_to_host() else: assert isinstance(D_cuml, np.ndarray) assert isinstance(I_cuml, np.ndarray) D_cuml_arr = D_cuml I_cuml_arr = I_cuml # Assert the cuml model was properly reverted np.testing.assert_allclose(knn_cu._X_m.to_output("numpy"), X_orig, atol=1e-3, rtol=1e-3) if metric == 'braycurtis': diff = D_cuml_arr - D_sk # Braycurtis has a few differences, but this is computed by FAISS. # So long as the indices all match below, the small discrepancy # should be okay. > assert len(diff[diff > 1e-2]) / X_search.shape[0] < 0.06 E assert (8331690 / 499899) < 0.06 E + where 8331690 = len(array([0.31385932, 0.32012013, 0.32082888, ..., 0.32249511, 0.32323558,\n 0.32362939])) test_nearest_neighbors.py:159: AssertionError ===================================================================== warnings summary ===================================================================== ../../../../../opt/conda/envs/rapids/lib/python3.7/importlib/_bootstrap.py:219 ../../../../../opt/conda/envs/rapids/lib/python3.7/importlib/_bootstrap.py:219 /opt/conda/envs/rapids/lib/python3.7/importlib/_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject return f(*args, **kwds) ../../../../../opt/conda/envs/rapids/lib/python3.7/site-packages/sklearn/utils/deprecation.py:143 /opt/conda/envs/rapids/lib/python3.7/site-packages/sklearn/utils/deprecation.py:143: FutureWarning: The sklearn.datasets.samples_generator module is deprecated in version 0.22 and will be removed in version 0.24. The corresponding classes / functions should instead be imported from sklearn.datasets. Anything that cannot be imported from sklearn.datasets is now part of the private API. warnings.warn(message, FutureWarning) -- Docs: https://docs.pytest.org/en/stable/warnings.html ================================================================= short test summary info ================================================================== FAILED test_nearest_neighbors.py::test_knn_separate_index_search[braycurtis-50-1000-500000-ndarray] - assert (8331690 / 499899) < 0.06 ======================================================== 1 failed, 3 warnings in 108.51s (0:01:48) =========================================================