Skip to content

Commit

Permalink
dl tutorial files to tmp directory, then move them once successful (#…
Browse files Browse the repository at this point in the history
…1393)

* dl tutorial files to tmp directory, then move them once successful

closes #1392

* redo attempt at tutorial checking to use md5 checksums instead. depends on pydata/xarray-data#9

* rm extraneous import

* update md5 function name

* update whats-new.rst

* fix issue link in whats-new

* adding tutorial dataset test, adds conditional --run-network-tests flag to pytest cli

* one suppress block per file
  • Loading branch information
gidden authored and shoyer committed May 21, 2017
1 parent 5f92955 commit 3737d26
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 8 deletions.
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ matrix:
- python: 3.6
env:
- CONDA_ENV=py36
- EXTRA_FLAGS="--run-flaky"
- EXTRA_FLAGS="--run-flaky --run-network-tests"
- python: 3.6
env: CONDA_ENV=py36-pydap
- python: 3.6
Expand All @@ -45,7 +45,7 @@ matrix:
- python: 3.6
env:
- CONDA_ENV=py36
- EXTRA_FLAGS="--run-flaky"
- EXTRA_FLAGS="--run-flaky --run-network-tests"
- python: 3.6
env: CONDA_ENV=py36-pydap
- python: 3.6
Expand Down
2 changes: 2 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ def pytest_addoption(parser):
"""Add command-line flags for pytest."""
parser.addoption("--run-flaky", action="store_true",
help="runs flaky tests")
parser.addoption("--run-network-tests", action="store_true",
help="runs tests requiring a network connection")
9 changes: 9 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ Enhancements
By `Chun-Wei Yuan <https://github.com/chunweiyuan>`_ and
`Kyle Heuton <https://github.com/kheuton>`_.

- Enhanced tests suite by use of ``@network`` decorator, which is
controlled via ``--run-network-tests`` command line argument
to ``py.test`` (:issue:`1393`).
By `Matthew Gidden <https://github.com/gidden>`_.

Bug fixes
~~~~~~~~~

Expand All @@ -40,6 +45,10 @@ By `Ryan Abernathey <https://github.com/rabernat>`_.
``data_vars``.
By `Keisuke Fujii <https://github.com/fujiisoup>`_.

- Tutorial datasets are now checked against a reference MD5 sum to confirm
successful download (:issue:`1392`). By `Matthew Gidden
<https://github.com/gidden>`_.

.. _whats-new.0.9.5:

v0.9.5 (17 April, 2017)
Expand Down
8 changes: 8 additions & 0 deletions xarray/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,14 +100,20 @@

try:
_SKIP_FLAKY = not pytest.config.getoption("--run-flaky")
_SKIP_NETWORK_TESTS = not pytest.config.getoption("--run-network-tests")
except ValueError:
# Can't get config from pytest, e.g., because xarray is installed instead
# of being run from a development version (and hence conftests.py is not
# available). Don't run flaky tests.
_SKIP_FLAKY = True
_SKIP_NETWORK_TESTS = True

flaky = pytest.mark.skipif(
_SKIP_FLAKY, reason="set --run-flaky option to run flaky tests")
network = pytest.mark.skipif(
_SKIP_NETWORK_TESTS,
reason="set --run-network-tests option to run tests requiring an "
"internet connection")


class TestCase(unittest.TestCase):
Expand Down Expand Up @@ -173,6 +179,7 @@ class UnexpectedDataAccess(Exception):


class InaccessibleArray(utils.NDArrayMixin):

def __init__(self, array):
self.array = array

Expand All @@ -181,6 +188,7 @@ def __getitem__(self, key):


class ReturnItem(object):

def __getitem__(self, key):
return key

Expand Down
12 changes: 8 additions & 4 deletions xarray/tests/test_tutorial.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,27 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import pytest

from xarray import tutorial, DataArray
from xarray.core.pycompat import suppress

from . import TestCase, unittest
from . import TestCase, network


@unittest.skip('TODO: make this conditional on network availability')
class Test_load_dataset(TestCase):
@network
class TestLoadDataset(TestCase):

def setUp(self):
self.testfile = 'tiny'
self.testfilepath = os.path.expanduser(os.sep.join(
('~', '.xarray_tutorial_data', self.testfile)))
with suppress(OSError):
os.remove(self.testfilepath)
os.remove('{}.nc'.format(self.testfilepath))
with suppress(OSError):
os.remove('{}.md5'.format(self.testfilepath))

def test_download_from_github(self):
ds = tutorial.load_dataset(self.testfile)
Expand Down
30 changes: 28 additions & 2 deletions xarray/tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from __future__ import division
from __future__ import print_function

import hashlib

import os as _os

from .backends.api import open_dataset as _open_dataset
Expand All @@ -18,9 +20,17 @@
_default_cache_dir = _os.sep.join(('~', '.xarray_tutorial_data'))


def file_md5_checksum(fname):
hash_md5 = hashlib.md5()
with open(fname, "rb") as f:
hash_md5.update(f.read())
return hash_md5.hexdigest()


# idea borrowed from Seaborn
def load_dataset(name, cache=True, cache_dir=_default_cache_dir,
github_url='https://github.com/pydata/xarray-data', **kws):
github_url='https://github.com/pydata/xarray-data',
branch='master', **kws):
"""
Load a dataset from the online repository (requires internet).
Expand All @@ -37,13 +47,17 @@ def load_dataset(name, cache=True, cache_dir=_default_cache_dir,
If True, then cache data locally for use on subsequent calls
github_url : string
Github repository where the data is stored
branch : string
The git branch to download from
kws : dict, optional
Passed to xarray.open_dataset
"""
longdir = _os.path.expanduser(cache_dir)
fullname = name + '.nc'
localfile = _os.sep.join((longdir, fullname))
md5name = name + '.md5'
md5file = _os.sep.join((longdir, md5name))

if not _os.path.exists(localfile):

Expand All @@ -52,8 +66,20 @@ def load_dataset(name, cache=True, cache_dir=_default_cache_dir,
if not _os.path.isdir(longdir):
_os.mkdir(longdir)

url = '/'.join((github_url, 'raw', 'master', fullname))
url = '/'.join((github_url, 'raw', branch, fullname))
_urlretrieve(url, localfile)
url = '/'.join((github_url, 'raw', branch, md5name))
_urlretrieve(url, md5file)

localmd5 = file_md5_checksum(localfile)
with open(md5file, 'r') as f:
remotemd5 = f.read()
if localmd5 != remotemd5:
_os.remove(localfile)
msg = """
MD5 checksum does not match, try downloading dataset again.
"""
raise IOError(msg)

ds = _open_dataset(localfile, **kws).load()

Expand Down

0 comments on commit 3737d26

Please sign in to comment.