test_fil.py
=================================== FAILURES ===================================
___________________ test_fil_classification[90-1000-500000] ____________________

n_rows = 500000, n_columns = 1000, num_rounds = 90
tmp_path = PosixPath('/tmp/pytest-of-rapids/pytest-0/test_fil_classification_90_1000')

@pytest.mark.parametrize('n_rows', [unit_param(1000),
                                    quality_param(10000),
                                    stress_param(500000)])
@pytest.mark.parametrize('n_columns', [unit_param(20),
                                       quality_param(100),
                         stress_param(1000)])
@pytest.mark.parametrize('num_rounds', [unit_param(1),
                                        unit_param(5),
                                        quality_param(50),
                                        stress_param(90)])
@pytest.mark.skipif(has_xgboost() is False, reason="need to install xgboost")
def test_fil_classification(n_rows, n_columns, num_rounds, tmp_path):
    # settings
    classification = True  # change this to false to use regression
    n_rows = n_rows  # we'll use 1 millions rows
    n_columns = n_columns
    n_categories = 2
    random_state = np.random.RandomState(43210)

    X, y = simulate_data(n_rows, n_columns, n_categories,
                         random_state=random_state,
                         classification=classification)
    # identify shape and indices
    n_rows, n_columns = X.shape
    train_size = 0.80

    X_train, X_validation, y_train, y_validation = train_test_split(
        X, y, train_size=train_size, random_state=0)

    model_path = os.path.join(tmp_path, 'xgb_class.model')

    bst = _build_and_save_xgboost(model_path, X_train, y_train,
                                  num_rounds=num_rounds,
                                  classification=classification)

    dvalidation = xgb.DMatrix(X_validation, label=y_validation)
    xgb_preds = bst.predict(dvalidation)
    xgb_preds_int = np.around(xgb_preds)
    xgb_proba = np.stack([1-xgb_preds, xgb_preds], axis=1)

    xgb_acc = accuracy_score(y_validation, xgb_preds > 0.5)
    fm = ForestInference.load(model_path,
                              algo='auto',
                              output_class=True,
                              threshold=0.50)
    fil_preds = np.asarray(fm.predict(X_validation))
    fil_preds = np.reshape(fil_preds, np.shape(xgb_preds_int))
    fil_proba = np.asarray(fm.predict_proba(X_validation))

    fil_proba = np.reshape(fil_proba, np.shape(xgb_proba))
    fil_acc = accuracy_score(y_validation, fil_preds)

    assert fil_acc == pytest.approx(xgb_acc, abs=0.01)
    assert array_equal(fil_preds, xgb_preds_int)

> assert np.allclose(fil_proba, xgb_proba, 1e-3)

E assert False
E + where False = <function allclose at 0x7f3322e7bb90>(array([[2.4144053e-02, 9.7585595e-01],\n [9.9969733e-01, 3.0264948e-04],\n [9.9668568e-01, 3.3143184e-03],\n ...2073e-02, 9.8477793e-01],\n [9.9980170e-01, 1.9833411e-04],\n [8.8951540e-01, 1.1048459e-01]], dtype=float32), array([[2.41440535e-02, 9.75855947e-01],\n [9.99697328e-01, 3.02650064e-04],\n [9.96685684e-01, 3.31431208e-...-02, 9.84777927e-01],\n [9.99801695e-01, 1.98333917e-04],\n [8.89515460e-01, 1.10484555e-01]], dtype=float32), 0.001)
E + where <function allclose at 0x7f3322e7bb90> = np.allclose

test_fil.py:138: AssertionError
----------------------------- Captured stdout call -----------------------------
[18:31:40] WARNING: /rapids/xgboost/src/learner.cc:529:
Parameters: { silent } might not be used.

This may not be accurate due to some parameters are only used in language bindings but
passed down to XGBoost core. Or some parameters are not used but slip through this
verification. Please open an issue if you find above cases.

____________________ test_fil_regression[11-90-1000-500000] ____________________

n_rows = 500000, n_columns = 1000, num_rounds = 90
tmp_path = PosixPath('/tmp/pytest-of-rapids/pytest-0/test_fil_regression_11_90_10000')
max_depth = 11

```
@pytest.mark.parametrize('n_rows', [unit_param(1000), quality_param(10000),
                         stress_param(500000)])
@pytest.mark.parametrize('n_columns', [unit_param(20), quality_param(100),
                         stress_param(1000)])
@pytest.mark.parametrize('num_rounds', [unit_param(5), quality_param(10),
                         stress_param(90)])
@pytest.mark.parametrize('max_depth', [unit_param(3),
                                       unit_param(7),
                                       stress_param(11)])
@pytest.mark.skipif(has_xgboost() is False, reason="need to install xgboost")
def test_fil_regression(n_rows, n_columns, num_rounds, tmp_path, max_depth):
    # settings
    classification = False  # change this to false to use regression
    n_rows = n_rows  # we'll use 1 millions rows
    n_columns = n_columns
    random_state = np.random.RandomState(43210)

    X, y = simulate_data(n_rows, n_columns,
                         random_state=random_state,
                         classification=classification, bias=10.0)
    # identify shape and indices
    n_rows, n_columns = X.shape
    train_size = 0.80

    X_train, X_validation, y_train, y_validation = train_test_split(
        X, y, train_size=train_size, random_state=0)

    model_path = os.path.join(tmp_path, 'xgb_reg.model')
    bst = _build_and_save_xgboost(model_path, X_train,
                                  y_train,
                                  classification=classification,
                                  num_rounds=num_rounds,
                                  xgboost_params={'max_depth': max_depth})

    dvalidation = xgb.DMatrix(X_validation, label=y_validation)
    xgb_preds = bst.predict(dvalidation)

    xgb_mse = mean_squared_error(y_validation, xgb_preds)
    fm = ForestInference.load(model_path,
                              algo='auto',
                              output_class=False)
    fil_preds = np.asarray(fm.predict(X_validation))
    fil_preds = np.reshape(fil_preds, np.shape(xgb_preds))
    fil_mse = mean_squared_error(y_validation, fil_preds)

    assert fil_mse == pytest.approx(xgb_mse, abs=0.01)

```

> assert np.allclose(fil_preds, xgb_preds, 1e-3)

E assert False
E + where False = <function allclose at 0x7f3322e7bb90>(array([ -431.23352 , -1276.0767 , -18.129177, ..., 680.99805 ,\n 500.81107 , -255.79837 ], dtype=float32), array([ -431.2336 , -1276.0767 , -18.129166, ..., 680.998 ,\n 500.81082 , -255.79826 ], dtype=float32), 0.001)
E + where <function allclose at 0x7f3322e7bb90> = np.allclose

test_fil.py:187: AssertionError
----------------------------- Captured stdout call -----------------------------
[18:43:21] WARNING: /rapids/xgboost/src/learner.cc:529:
Parameters: { silent } might not be used.

This may not be accurate due to some parameters are only used in language bindings but
passed down to XGBoost core. Or some parameters are not used but slip through this
verification. Please open an issue if you find above cases.

=============================== warnings summary ===============================
../../../../../opt/conda/envs/rapids/lib/python3.7/importlib/_bootstrap.py:219
../../../../../opt/conda/envs/rapids/lib/python3.7/importlib/_bootstrap.py:219
/opt/conda/envs/rapids/lib/python3.7/importlib/_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject
return f(*args, **kwds)

- - Docs: [https://docs.pytest.org/en/stable/warnings.html](https://docs.pytest.org/en/stable/warnings.html)
=========================== short test summary info ============================
FAILED test_fil.py::test_fil_classification[90-1000-500000] - assert False
FAILED test_fil.py::test_fil_regression[11-90-1000-500000] - assert False
====== 2 failed, 66 passed, 115 skipped, 2 warnings in 1234.83s (0:20:34) ======