Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor Python tests. #3897

Merged
merged 9 commits into from
Nov 15, 2018
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -96,11 +96,11 @@ def buildPlatformCmake(buildName, conf, nodeReq, dockerTarget) {
# Test the wheel for compatibility on a barebones CPU container
${dockerRun} release ${dockerArgs} bash -c " \
pip install --user python-package/dist/xgboost-*-none-any.whl && \
python -m nose -v tests/python"
pytest -v --fulltrace -s tests/python
# Test the wheel for compatibility on CUDA 10.0 container
${dockerRun} gpu --build-arg CUDA_VERSION=10.0 bash -c " \
pip install --user python-package/dist/xgboost-*-none-any.whl && \
python -m nose -v --eval-attr='(not slow) and (not mgpu)' tests/python-gpu"
pytest -v -s --fulltrace -m "(not mgpu) and (not slow)" tests/python-gpu
trivialfis marked this conversation as resolved.
Show resolved Hide resolved
"""
}
}
Expand Down
4 changes: 2 additions & 2 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ install:
- set DO_PYTHON=off
- if /i "%target%" == "mingw" set DO_PYTHON=on
- if /i "%target%_%ver%_%configuration%" == "msvc_2015_Release" set DO_PYTHON=on
- if /i "%DO_PYTHON%" == "on" conda install -y numpy scipy pandas matplotlib nose scikit-learn graphviz python-graphviz
- if /i "%DO_PYTHON%" == "on" conda install -y numpy scipy pandas matplotlib pytest scikit-learn graphviz python-graphviz
# R: based on https://github.com/krlmlr/r-appveyor
- ps: |
if($env:target -eq 'rmingw' -or $env:target -eq 'rmsvc') {
Expand Down Expand Up @@ -96,7 +96,7 @@ build_script:

test_script:
- cd %APPVEYOR_BUILD_FOLDER%
- if /i "%DO_PYTHON%" == "on" python -m nose tests/python
- if /i "%DO_PYTHON%" == "on" python -m pytest tests/python
# mingw R package: run the R check (which includes unit tests), and also keep the built binary package
- if /i "%target%" == "rmingw" (
set _R_CHECK_CRAN_INCOMING_=FALSE&&
Expand Down
2 changes: 1 addition & 1 deletion tests/ci_build/Dockerfile.gpu
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ ENV CPP=/opt/rh/devtoolset-2/root/usr/bin/cpp

# Install Python packages
RUN \
pip install numpy nose scipy scikit-learn wheel
pip install numpy pytest scipy scikit-learn wheel

ENV GOSU_VERSION 1.10

Expand Down
4 changes: 2 additions & 2 deletions tests/ci_build/Dockerfile.release
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ ENV PATH=/opt/python/bin:$PATH

# Install Python packages
RUN \
conda install numpy scipy pandas matplotlib nose scikit-learn && \
pip install nose wheel auditwheel graphviz
conda install numpy scipy pandas matplotlib pytest scikit-learn && \
pip install pytest wheel auditwheel graphviz

ENV GOSU_VERSION 1.10

Expand Down
3 changes: 1 addition & 2 deletions tests/ci_build/test_gpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,5 @@ set -e
cd python-package
python setup.py install --user
cd ..
python -m nose -v --eval-attr='(not slow) and (not mgpu)' tests/python-gpu/
pytest -v -s --fulltrace -m "(not mgpu) and (not slow)" tests/python-gpu
./testxgboost --gtest_filter=-*.MGPU_*

2 changes: 1 addition & 1 deletion tests/ci_build/test_mgpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ set -e
cd python-package
python setup.py install --user
cd ..
python -m nose -v --eval-attr='(not slow) and mgpu' tests/python-gpu/
pytest -v -s --fulltrace -m "(not slow) and mgpu" tests/python-gpu
./testxgboost --gtest_filter=*.MGPU_*
7 changes: 4 additions & 3 deletions tests/python-gpu/test_gpu_linear.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
import sys
import pytest
import unittest

sys.path.append('tests/python/')
import test_linear
import testing as tm
import unittest


class TestGPULinear(unittest.TestCase):

datasets = ["Boston", "Digits", "Cancer", "Sparse regression",
"Boston External Memory"]


@pytest.mark.skipif(**tm.no_sklearn())
def test_gpu_coordinate(self):
tm._skip_if_no_sklearn()
variable_param = {
'booster': ['gblinear'],
'updater': ['coord_descent'],
Expand Down
35 changes: 21 additions & 14 deletions tests/python-gpu/test_gpu_prediction.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
from __future__ import print_function

import numpy as np
import sys
import unittest
import xgboost as xgb
from nose.plugins.attrib import attr
import pytest

rng = np.random.RandomState(1994)


@attr('gpu')
@pytest.mark.gpu
class TestGPUPredict(unittest.TestCase):
def test_predict(self):
iterations = 10
Expand All @@ -18,17 +17,21 @@ def test_predict(self):
test_num_cols = [10, 50, 500]
for num_rows in test_num_rows:
for num_cols in test_num_cols:
dtrain = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2))
dval = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2))
dtest = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2))
dtrain = xgb.DMatrix(np.random.randn(num_rows, num_cols),
label=[0, 1] * int(num_rows / 2))
dval = xgb.DMatrix(np.random.randn(num_rows, num_cols),
label=[0, 1] * int(num_rows / 2))
dtest = xgb.DMatrix(np.random.randn(num_rows, num_cols),
label=[0, 1] * int(num_rows / 2))
watchlist = [(dtrain, 'train'), (dval, 'validation')]
res = {}
param = {
"objective": "binary:logistic",
"predictor": "gpu_predictor",
'eval_metric': 'auc',
}
bst = xgb.train(param, dtrain, iterations, evals=watchlist, evals_result=res)
bst = xgb.train(param, dtrain, iterations, evals=watchlist,
evals_result=res)
assert self.non_decreasing(res["train"]["auc"])
gpu_pred_train = bst.predict(dtrain, output_margin=True)
gpu_pred_test = bst.predict(dtest, output_margin=True)
Expand All @@ -39,21 +42,26 @@ def test_predict(self):
cpu_pred_train = bst_cpu.predict(dtrain, output_margin=True)
cpu_pred_test = bst_cpu.predict(dtest, output_margin=True)
cpu_pred_val = bst_cpu.predict(dval, output_margin=True)
np.testing.assert_allclose(cpu_pred_train, gpu_pred_train, rtol=1e-5)
np.testing.assert_allclose(cpu_pred_val, gpu_pred_val, rtol=1e-5)
np.testing.assert_allclose(cpu_pred_test, gpu_pred_test, rtol=1e-5)
np.testing.assert_allclose(cpu_pred_train, gpu_pred_train,
rtol=1e-5)
np.testing.assert_allclose(cpu_pred_val, gpu_pred_val,
rtol=1e-5)
np.testing.assert_allclose(cpu_pred_test, gpu_pred_test,
rtol=1e-5)

def non_decreasing(self, L):
return all((x - y) < 0.001 for x, y in zip(L, L[1:]))

# Test case for a bug where multiple batch predictions made on a test set produce incorrect results
# Test case for a bug where multiple batch predictions made on a
# test set produce incorrect results
def test_multi_predict(self):
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split

n = 1000
X, y = make_regression(n, random_state=rng)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123)
X_train, X_test, y_train, y_test = train_test_split(X, y,
random_state=123)
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test)

Expand Down Expand Up @@ -85,8 +93,7 @@ def test_sklearn(self):
params = {'tree_method': 'gpu_hist',
'predictor': 'cpu_predictor',
'n_jobs': -1,
'seed': 123
}
'seed': 123}
m = xgb.XGBRegressor(**params).fit(X_train, y_train)
cpu_train_score = m.score(X_train, y_train)
cpu_test_score = m.score(X_test, y_test)
Expand Down
7 changes: 3 additions & 4 deletions tests/python-gpu/test_gpu_updaters.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import numpy as np
import sys
import unittest
from nose.plugins.attrib import attr
import pytest

sys.path.append("tests/python")
import xgboost as xgb
from regression_test_utilities import run_suite, parameter_combinations, \
assert_results_non_increasing

Expand Down Expand Up @@ -45,7 +44,7 @@ def test_gpu_hist(self):
cpu_results = run_suite(param, select_datasets=datasets)
assert_gpu_results(cpu_results, gpu_results)

@attr('mgpu')
@pytest.mark.mgpu
def test_gpu_hist_mgpu(self):
variable_param = {'n_gpus': [-1], 'max_depth': [2, 10],
'max_leaves': [255, 4],
Expand All @@ -56,7 +55,7 @@ def test_gpu_hist_mgpu(self):
gpu_results = run_suite(param, select_datasets=datasets)
assert_results_non_increasing(gpu_results, 1e-2)

@attr('mgpu')
@pytest.mark.mgpu
def test_specified_gpu_id_gpu_update(self):
variable_param = {'n_gpus': [1],
'gpu_id': [1],
Expand Down
19 changes: 7 additions & 12 deletions tests/python-gpu/test_large_sizes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@

import sys
import time
import pytest

sys.path.append("../../tests/python")
import xgboost as xgb
import numpy as np
import unittest
from nose.plugins.attrib import attr


def eprint(*args, **kwargs):
Expand All @@ -16,9 +16,11 @@ def eprint(*args, **kwargs):
print(*args, file=sys.stdout, **kwargs)
sys.stdout.flush()


rng = np.random.RandomState(1994)

# "realistic" size based upon http://stat-computing.org/dataexpo/2009/ , which has been processed to one-hot encode categoricalsxsy
# "realistic" size based upon http://stat-computing.org/dataexpo/2009/
# , which has been processed to one-hot encode categoricalsxsy
cols = 31
# reduced to fit onto 1 gpu but still be large
rows3 = 5000 # small
Expand All @@ -28,7 +30,7 @@ def eprint(*args, **kwargs):
rowslist = [rows1, rows2, rows3]


@attr('slow')
@pytest.mark.slow
class TestGPU(unittest.TestCase):
def test_large(self):
for rows in rowslist:
Expand All @@ -47,15 +49,8 @@ def test_large(self):
max_depth = 6
max_bin = 1024

# regression test --- hist must be same as exact on all-categorial data
ag_param = {'max_depth': max_depth,
'tree_method': 'exact',
'nthread': 0,
'eta': 1,
'silent': 0,
'debug_verbose': 5,
'objective': 'binary:logistic',
'eval_metric': 'auc'}
# regression test --- hist must be same as exact on
# all-categorial data
ag_paramb = {'max_depth': max_depth,
'tree_method': 'hist',
'nthread': 0,
Expand Down
8 changes: 5 additions & 3 deletions tests/python-gpu/test_monotonic_constraints.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from __future__ import print_function

import numpy as np
from sklearn.datasets import make_regression

import unittest
import pytest

import xgboost as xgb
from nose.plugins.attrib import attr
from sklearn.datasets import make_regression

rng = np.random.RandomState(1994)

Expand Down Expand Up @@ -33,7 +35,7 @@ def assert_constraint(constraint, tree_method):
assert non_increasing(pred)


@attr('gpu')
@pytest.mark.gpu
class TestMonotonicConstraints(unittest.TestCase):
def test_exact(self):
assert_constraint(1, 'exact')
Expand Down
36 changes: 25 additions & 11 deletions tests/python/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ def captured_output():
"""
Reassign stdout temporarily in order to test printed statements
Taken from: https://stackoverflow.com/questions/4219717/how-to-assert-output-with-nosetest-unittest-in-python

Also works for pytest.
"""
new_out, new_err = StringIO(), StringIO()
old_out, old_err = sys.stdout, sys.stderr
Expand All @@ -36,15 +38,17 @@ class TestBasic(unittest.TestCase):
def test_basic(self):
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
param = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
param = {'max_depth': 2, 'eta': 1, 'silent': 1,
'objective': 'binary:logistic'}
# specify validations set to watch performance
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
num_round = 2
bst = xgb.train(param, dtrain, num_round, watchlist)
# this is prediction
preds = bst.predict(dtest)
labels = dtest.get_label()
err = sum(1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
err = sum(1 for i in range(len(preds))
if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
# error must be smaller than 10%
assert err < 0.1

Expand All @@ -62,7 +66,8 @@ def test_basic(self):
def test_record_results(self):
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
param = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
param = {'max_depth': 2, 'eta': 1, 'silent': 1,
'objective': 'binary:logistic'}
# specify validations set to watch performance
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
num_round = 2
Expand All @@ -86,7 +91,8 @@ def test_multiclass(self):
# this is prediction
preds = bst.predict(dtest)
labels = dtest.get_label()
err = sum(1 for i in range(len(preds)) if preds[i] != labels[i]) / float(len(preds))
err = sum(1 for i in range(len(preds))
if preds[i] != labels[i]) / float(len(preds))
# error must be smaller than 10%
assert err < 0.1

Expand Down Expand Up @@ -248,7 +254,8 @@ def test_dmatrix_numpy_init(self):

def test_cv(self):
dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
params = {'max_depth': 2, 'eta': 1, 'silent': 1,
'objective': 'binary:logistic'}

# return np.ndarray
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=False)
Expand All @@ -257,29 +264,34 @@ def test_cv(self):

def test_cv_no_shuffle(self):
dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
params = {'max_depth': 2, 'eta': 1, 'silent': 1,
'objective': 'binary:logistic'}

# return np.ndarray
cv = xgb.cv(params, dm, num_boost_round=10, shuffle=False, nfold=10, as_pandas=False)
cv = xgb.cv(params, dm, num_boost_round=10, shuffle=False, nfold=10,
as_pandas=False)
assert isinstance(cv, dict)
assert len(cv) == (4)

def test_cv_explicit_fold_indices(self):
dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective':
'binary:logistic'}
folds = [
# Train Test
([1, 3], [5, 8]),
([7, 9], [23, 43]),
]

# return np.ndarray
cv = xgb.cv(params, dm, num_boost_round=10, folds=folds, as_pandas=False)
cv = xgb.cv(params, dm, num_boost_round=10, folds=folds,
as_pandas=False)
assert isinstance(cv, dict)
assert len(cv) == (4)

def test_cv_explicit_fold_indices_labels(self):
params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'reg:linear'}
params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective':
'reg:linear'}
N = 100
F = 3
dm = xgb.DMatrix(data=np.random.randn(N, F), label=np.arange(N))
Expand All @@ -300,7 +312,9 @@ def cb(cbackenv):
as_pandas=False
)
output = out.getvalue().strip()
assert output == '[array([5., 8.], dtype=float32), array([23., 43., 11.], dtype=float32)]'
solution = ('[array([5., 8.], dtype=float32), array([23., 43., 11.],' +
' dtype=float32)]')
assert output == solution

def test_get_info(self):
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
Expand Down
Loading