From b1dabfe3b36bbacb5a2deeea6856cf81166f7f08 Mon Sep 17 00:00:00 2001 From: Matthew Gidden Date: Tue, 2 May 2017 06:47:17 -0500 Subject: [PATCH 1/8] dl tutorial files to tmp directory, then move them once successful closes #1392 --- xarray/tutorial.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/xarray/tutorial.py b/xarray/tutorial.py index 858f08d77e9..b4e606c0ee3 100644 --- a/xarray/tutorial.py +++ b/xarray/tutorial.py @@ -10,6 +10,7 @@ from __future__ import print_function import os as _os +import shutil as _shutil from .backends.api import open_dataset as _open_dataset from .core.pycompat import urlretrieve as _urlretrieve @@ -42,8 +43,10 @@ def load_dataset(name, cache=True, cache_dir=_default_cache_dir, """ longdir = _os.path.expanduser(cache_dir) + tmpdir = _os.sep.join((longdir, '.tmp')) fullname = name + '.nc' localfile = _os.sep.join((longdir, fullname)) + tmpfile = _os.sep.join((longdir, fullname)) if not _os.path.exists(localfile): @@ -51,9 +54,16 @@ def load_dataset(name, cache=True, cache_dir=_default_cache_dir, # May want to add an option to remove it. if not _os.path.isdir(longdir): _os.mkdir(longdir) + if not _os.path.isdir(tmpdir): + _os.mkdir(tmpdir) url = '/'.join((github_url, 'raw', 'master', fullname)) - _urlretrieve(url, localfile) + _urlretrieve(url, tmpfile) + + if not _os.path.exists(tmpfile): + raise ValueError('File could not be downloaded, please try again') + + _shutil.move(tmpfile, localfile) ds = _open_dataset(localfile, **kws).load() From 3abf31fb6268634b75aee41e2a4ece1a7bdda8a9 Mon Sep 17 00:00:00 2001 From: Matthew Gidden Date: Tue, 2 May 2017 20:06:44 +0200 Subject: [PATCH 2/8] redo attempt at tutorial checking to use md5 checksums instead. depends on pydata/xarray-data#9 --- xarray/tutorial.py | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/xarray/tutorial.py b/xarray/tutorial.py index b4e606c0ee3..81a78d3b8ad 100644 --- a/xarray/tutorial.py +++ b/xarray/tutorial.py @@ -9,6 +9,8 @@ from __future__ import division from __future__ import print_function +import hashlib + import os as _os import shutil as _shutil @@ -19,9 +21,17 @@ _default_cache_dir = _os.sep.join(('~', '.xarray_tutorial_data')) +def _md5(fname): + hash_md5 = hashlib.md5() + with open(fname, "rb") as f: + hash_md5.update(f.read()) + return hash_md5.hexdigest() + + # idea borrowed from Seaborn def load_dataset(name, cache=True, cache_dir=_default_cache_dir, - github_url='https://github.com/pydata/xarray-data', **kws): + github_url='https://github.com/pydata/xarray-data', + branch='master', **kws): """ Load a dataset from the online repository (requires internet). @@ -38,15 +48,17 @@ def load_dataset(name, cache=True, cache_dir=_default_cache_dir, If True, then cache data locally for use on subsequent calls github_url : string Github repository where the data is stored + branch : string + The git branch to download from kws : dict, optional Passed to xarray.open_dataset """ longdir = _os.path.expanduser(cache_dir) - tmpdir = _os.sep.join((longdir, '.tmp')) fullname = name + '.nc' localfile = _os.sep.join((longdir, fullname)) - tmpfile = _os.sep.join((longdir, fullname)) + md5name = name + '.md5' + md5file = _os.sep.join((longdir, md5name)) if not _os.path.exists(localfile): @@ -54,16 +66,21 @@ def load_dataset(name, cache=True, cache_dir=_default_cache_dir, # May want to add an option to remove it. if not _os.path.isdir(longdir): _os.mkdir(longdir) - if not _os.path.isdir(tmpdir): - _os.mkdir(tmpdir) - - url = '/'.join((github_url, 'raw', 'master', fullname)) - _urlretrieve(url, tmpfile) - - if not _os.path.exists(tmpfile): - raise ValueError('File could not be downloaded, please try again') - _shutil.move(tmpfile, localfile) + url = '/'.join((github_url, 'raw', branch, fullname)) + _urlretrieve(url, localfile) + url = '/'.join((github_url, 'raw', branch, md5name)) + _urlretrieve(url, md5file) + + localmd5 = _md5(localfile) + with open(md5file, 'r') as f: + remotemd5 = f.read() + if localmd5 != remotemd5: + _os.remove(localfile) + msg = """ + MD5 checksum does not match, try downloading dataset again. + """ + raise IOError(msg) ds = _open_dataset(localfile, **kws).load() From 16133b429108fbcfb4923e925689158775466570 Mon Sep 17 00:00:00 2001 From: Matthew Gidden Date: Tue, 2 May 2017 20:10:08 +0200 Subject: [PATCH 3/8] rm extraneous import --- xarray/tutorial.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/tutorial.py b/xarray/tutorial.py index 81a78d3b8ad..b54c4cd5c7d 100644 --- a/xarray/tutorial.py +++ b/xarray/tutorial.py @@ -12,7 +12,6 @@ import hashlib import os as _os -import shutil as _shutil from .backends.api import open_dataset as _open_dataset from .core.pycompat import urlretrieve as _urlretrieve From 38d8ec13cbdac942b8d03731266f05df0e4cd3ea Mon Sep 17 00:00:00 2001 From: Matthew Gidden Date: Wed, 3 May 2017 10:15:00 -0500 Subject: [PATCH 4/8] update md5 function name --- xarray/tutorial.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/tutorial.py b/xarray/tutorial.py index b54c4cd5c7d..d7da63a328e 100644 --- a/xarray/tutorial.py +++ b/xarray/tutorial.py @@ -20,7 +20,7 @@ _default_cache_dir = _os.sep.join(('~', '.xarray_tutorial_data')) -def _md5(fname): +def file_md5_checksum(fname): hash_md5 = hashlib.md5() with open(fname, "rb") as f: hash_md5.update(f.read()) @@ -71,7 +71,7 @@ def load_dataset(name, cache=True, cache_dir=_default_cache_dir, url = '/'.join((github_url, 'raw', branch, md5name)) _urlretrieve(url, md5file) - localmd5 = _md5(localfile) + localmd5 = file_md5_checksum(localfile) with open(md5file, 'r') as f: remotemd5 = f.read() if localmd5 != remotemd5: From 349099a7dc61657f60fa7a783bc53f3f42083a8e Mon Sep 17 00:00:00 2001 From: Matthew Gidden Date: Wed, 3 May 2017 18:49:46 +0200 Subject: [PATCH 5/8] update whats-new.rst --- doc/whats-new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 7eccecf541e..82db0cd828e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -39,6 +39,9 @@ By `Ryan Abernathey `_. - Fix a bug where ``.isel_points`` wrongly assigns unselected coordinate to ``data_vars``. By `Keisuke Fujii `_. +- Tutorial datasets are now checked against a reference MD5 sum to confirm + successful download (:issue:`13921). By `Matthew Gidden + `_. .. _whats-new.0.9.5: From 44cb44d1e0804682b914e8198faad4cca522fd00 Mon Sep 17 00:00:00 2001 From: Matthew Gidden Date: Sun, 21 May 2017 16:57:16 +0200 Subject: [PATCH 6/8] fix issue link in whats-new --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 82db0cd828e..3c109815267 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -40,7 +40,7 @@ By `Ryan Abernathey `_. ``data_vars``. By `Keisuke Fujii `_. - Tutorial datasets are now checked against a reference MD5 sum to confirm - successful download (:issue:`13921). By `Matthew Gidden + successful download (:issue:`1392`). By `Matthew Gidden `_. .. _whats-new.0.9.5: From 400762c0f9c80b093285f2886044b1acb1f7e2c3 Mon Sep 17 00:00:00 2001 From: Matthew Gidden Date: Sun, 21 May 2017 17:21:33 +0200 Subject: [PATCH 7/8] adding tutorial dataset test, adds conditional --run-network-tests flag to pytest cli --- .travis.yml | 4 ++-- conftest.py | 2 ++ doc/whats-new.rst | 6 ++++++ xarray/tests/__init__.py | 8 ++++++++ xarray/tests/test_tutorial.py | 11 +++++++---- 5 files changed, 25 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8799944f3cf..868882657f6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -23,7 +23,7 @@ matrix: - python: 3.6 env: - CONDA_ENV=py36 - - EXTRA_FLAGS="--run-flaky" + - EXTRA_FLAGS="--run-flaky --run-network-tests" - python: 3.6 env: CONDA_ENV=py36-pydap - python: 3.6 @@ -45,7 +45,7 @@ matrix: - python: 3.6 env: - CONDA_ENV=py36 - - EXTRA_FLAGS="--run-flaky" + - EXTRA_FLAGS="--run-flaky --run-network-tests" - python: 3.6 env: CONDA_ENV=py36-pydap - python: 3.6 diff --git a/conftest.py b/conftest.py index ac9d2731df7..d7f4e0c89bc 100644 --- a/conftest.py +++ b/conftest.py @@ -5,3 +5,5 @@ def pytest_addoption(parser): """Add command-line flags for pytest.""" parser.addoption("--run-flaky", action="store_true", help="runs flaky tests") + parser.addoption("--run-network-tests", action="store_true", + help="runs tests requiring a network connection") diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 3c109815267..d24f25165e5 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -30,6 +30,11 @@ Enhancements By `Chun-Wei Yuan `_ and `Kyle Heuton `_. +- Enhanced tests suite by use of ``@network`` decorator, which is + controlled via ``--run-network-tests`` command line argument + to ``py.test`` (:issue:`1393`). + By `Matthew Gidden `_. + Bug fixes ~~~~~~~~~ @@ -39,6 +44,7 @@ By `Ryan Abernathey `_. - Fix a bug where ``.isel_points`` wrongly assigns unselected coordinate to ``data_vars``. By `Keisuke Fujii `_. + - Tutorial datasets are now checked against a reference MD5 sum to confirm successful download (:issue:`1392`). By `Matthew Gidden `_. diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 95af04b9e40..06a227bd2e3 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -100,14 +100,20 @@ try: _SKIP_FLAKY = not pytest.config.getoption("--run-flaky") + _SKIP_NETWORK_TESTS = not pytest.config.getoption("--run-network-tests") except ValueError: # Can't get config from pytest, e.g., because xarray is installed instead # of being run from a development version (and hence conftests.py is not # available). Don't run flaky tests. _SKIP_FLAKY = True + _SKIP_NETWORK_TESTS = True flaky = pytest.mark.skipif( _SKIP_FLAKY, reason="set --run-flaky option to run flaky tests") +network = pytest.mark.skipif( + _SKIP_NETWORK_TESTS, + reason="set --run-network-tests option to run tests requiring an " + "internet connection") class TestCase(unittest.TestCase): @@ -173,6 +179,7 @@ class UnexpectedDataAccess(Exception): class InaccessibleArray(utils.NDArrayMixin): + def __init__(self, array): self.array = array @@ -181,6 +188,7 @@ def __getitem__(self, key): class ReturnItem(object): + def __getitem__(self, key): return key diff --git a/xarray/tests/test_tutorial.py b/xarray/tests/test_tutorial.py index f120d3d0746..49b05f5cb62 100644 --- a/xarray/tests/test_tutorial.py +++ b/xarray/tests/test_tutorial.py @@ -1,23 +1,26 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function + import os +import pytest from xarray import tutorial, DataArray from xarray.core.pycompat import suppress -from . import TestCase, unittest +from . import TestCase, network -@unittest.skip('TODO: make this conditional on network availability') -class Test_load_dataset(TestCase): +@network +class TestLoadDataset(TestCase): def setUp(self): self.testfile = 'tiny' self.testfilepath = os.path.expanduser(os.sep.join( ('~', '.xarray_tutorial_data', self.testfile))) with suppress(OSError): - os.remove(self.testfilepath) + os.remove('{}.nc'.format(self.testfilepath)) + os.remove('{}.md5'.format(self.testfilepath)) def test_download_from_github(self): ds = tutorial.load_dataset(self.testfile) From f1c8886ac870b55dc1d27ed21c658c12ca609cc0 Mon Sep 17 00:00:00 2001 From: Matthew Gidden Date: Sun, 21 May 2017 17:48:01 +0200 Subject: [PATCH 8/8] one suppress block per file --- xarray/tests/test_tutorial.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/tests/test_tutorial.py b/xarray/tests/test_tutorial.py index 49b05f5cb62..56bdccedcfe 100644 --- a/xarray/tests/test_tutorial.py +++ b/xarray/tests/test_tutorial.py @@ -20,6 +20,7 @@ def setUp(self): ('~', '.xarray_tutorial_data', self.testfile))) with suppress(OSError): os.remove('{}.nc'.format(self.testfilepath)) + with suppress(OSError): os.remove('{}.md5'.format(self.testfilepath)) def test_download_from_github(self):