From 3737d2610bb6466babc5fe623eb8444d44dc0ceb Mon Sep 17 00:00:00 2001 From: Matthew Gidden Date: Sun, 21 May 2017 21:04:19 +0200 Subject: [PATCH] dl tutorial files to tmp directory, then move them once successful (#1393) * dl tutorial files to tmp directory, then move them once successful closes #1392 * redo attempt at tutorial checking to use md5 checksums instead. depends on pydata/xarray-data#9 * rm extraneous import * update md5 function name * update whats-new.rst * fix issue link in whats-new * adding tutorial dataset test, adds conditional --run-network-tests flag to pytest cli * one suppress block per file --- .travis.yml | 4 ++-- conftest.py | 2 ++ doc/whats-new.rst | 9 +++++++++ xarray/tests/__init__.py | 8 ++++++++ xarray/tests/test_tutorial.py | 12 ++++++++---- xarray/tutorial.py | 30 ++++++++++++++++++++++++++++-- 6 files changed, 57 insertions(+), 8 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8799944f3cf..868882657f6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -23,7 +23,7 @@ matrix: - python: 3.6 env: - CONDA_ENV=py36 - - EXTRA_FLAGS="--run-flaky" + - EXTRA_FLAGS="--run-flaky --run-network-tests" - python: 3.6 env: CONDA_ENV=py36-pydap - python: 3.6 @@ -45,7 +45,7 @@ matrix: - python: 3.6 env: - CONDA_ENV=py36 - - EXTRA_FLAGS="--run-flaky" + - EXTRA_FLAGS="--run-flaky --run-network-tests" - python: 3.6 env: CONDA_ENV=py36-pydap - python: 3.6 diff --git a/conftest.py b/conftest.py index ac9d2731df7..d7f4e0c89bc 100644 --- a/conftest.py +++ b/conftest.py @@ -5,3 +5,5 @@ def pytest_addoption(parser): """Add command-line flags for pytest.""" parser.addoption("--run-flaky", action="store_true", help="runs flaky tests") + parser.addoption("--run-network-tests", action="store_true", + help="runs tests requiring a network connection") diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 7eccecf541e..d24f25165e5 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -30,6 +30,11 @@ Enhancements By `Chun-Wei Yuan `_ and `Kyle Heuton `_. +- Enhanced tests suite by use of ``@network`` decorator, which is + controlled via ``--run-network-tests`` command line argument + to ``py.test`` (:issue:`1393`). + By `Matthew Gidden `_. + Bug fixes ~~~~~~~~~ @@ -40,6 +45,10 @@ By `Ryan Abernathey `_. ``data_vars``. By `Keisuke Fujii `_. +- Tutorial datasets are now checked against a reference MD5 sum to confirm + successful download (:issue:`1392`). By `Matthew Gidden + `_. + .. _whats-new.0.9.5: v0.9.5 (17 April, 2017) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 95af04b9e40..06a227bd2e3 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -100,14 +100,20 @@ try: _SKIP_FLAKY = not pytest.config.getoption("--run-flaky") + _SKIP_NETWORK_TESTS = not pytest.config.getoption("--run-network-tests") except ValueError: # Can't get config from pytest, e.g., because xarray is installed instead # of being run from a development version (and hence conftests.py is not # available). Don't run flaky tests. _SKIP_FLAKY = True + _SKIP_NETWORK_TESTS = True flaky = pytest.mark.skipif( _SKIP_FLAKY, reason="set --run-flaky option to run flaky tests") +network = pytest.mark.skipif( + _SKIP_NETWORK_TESTS, + reason="set --run-network-tests option to run tests requiring an " + "internet connection") class TestCase(unittest.TestCase): @@ -173,6 +179,7 @@ class UnexpectedDataAccess(Exception): class InaccessibleArray(utils.NDArrayMixin): + def __init__(self, array): self.array = array @@ -181,6 +188,7 @@ def __getitem__(self, key): class ReturnItem(object): + def __getitem__(self, key): return key diff --git a/xarray/tests/test_tutorial.py b/xarray/tests/test_tutorial.py index f120d3d0746..56bdccedcfe 100644 --- a/xarray/tests/test_tutorial.py +++ b/xarray/tests/test_tutorial.py @@ -1,23 +1,27 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function + import os +import pytest from xarray import tutorial, DataArray from xarray.core.pycompat import suppress -from . import TestCase, unittest +from . import TestCase, network -@unittest.skip('TODO: make this conditional on network availability') -class Test_load_dataset(TestCase): +@network +class TestLoadDataset(TestCase): def setUp(self): self.testfile = 'tiny' self.testfilepath = os.path.expanduser(os.sep.join( ('~', '.xarray_tutorial_data', self.testfile))) with suppress(OSError): - os.remove(self.testfilepath) + os.remove('{}.nc'.format(self.testfilepath)) + with suppress(OSError): + os.remove('{}.md5'.format(self.testfilepath)) def test_download_from_github(self): ds = tutorial.load_dataset(self.testfile) diff --git a/xarray/tutorial.py b/xarray/tutorial.py index 858f08d77e9..d7da63a328e 100644 --- a/xarray/tutorial.py +++ b/xarray/tutorial.py @@ -9,6 +9,8 @@ from __future__ import division from __future__ import print_function +import hashlib + import os as _os from .backends.api import open_dataset as _open_dataset @@ -18,9 +20,17 @@ _default_cache_dir = _os.sep.join(('~', '.xarray_tutorial_data')) +def file_md5_checksum(fname): + hash_md5 = hashlib.md5() + with open(fname, "rb") as f: + hash_md5.update(f.read()) + return hash_md5.hexdigest() + + # idea borrowed from Seaborn def load_dataset(name, cache=True, cache_dir=_default_cache_dir, - github_url='https://github.com/pydata/xarray-data', **kws): + github_url='https://github.com/pydata/xarray-data', + branch='master', **kws): """ Load a dataset from the online repository (requires internet). @@ -37,6 +47,8 @@ def load_dataset(name, cache=True, cache_dir=_default_cache_dir, If True, then cache data locally for use on subsequent calls github_url : string Github repository where the data is stored + branch : string + The git branch to download from kws : dict, optional Passed to xarray.open_dataset @@ -44,6 +56,8 @@ def load_dataset(name, cache=True, cache_dir=_default_cache_dir, longdir = _os.path.expanduser(cache_dir) fullname = name + '.nc' localfile = _os.sep.join((longdir, fullname)) + md5name = name + '.md5' + md5file = _os.sep.join((longdir, md5name)) if not _os.path.exists(localfile): @@ -52,8 +66,20 @@ def load_dataset(name, cache=True, cache_dir=_default_cache_dir, if not _os.path.isdir(longdir): _os.mkdir(longdir) - url = '/'.join((github_url, 'raw', 'master', fullname)) + url = '/'.join((github_url, 'raw', branch, fullname)) _urlretrieve(url, localfile) + url = '/'.join((github_url, 'raw', branch, md5name)) + _urlretrieve(url, md5file) + + localmd5 = file_md5_checksum(localfile) + with open(md5file, 'r') as f: + remotemd5 = f.read() + if localmd5 != remotemd5: + _os.remove(localfile) + msg = """ + MD5 checksum does not match, try downloading dataset again. + """ + raise IOError(msg) ds = _open_dataset(localfile, **kws).load()