Merge pull request #288 from NeurodataWithoutBorders/use_existing_dat…

…a_files Use data files for various tests
NeurodataWithoutBorders · Oct 12, 2022 · b1abb56 · b1abb56
2 parents 04d0f18 + dcaeef5
commit b1abb56
Show file tree

Hide file tree

Showing 36 changed files with 288 additions and 133 deletions.
diff --git a/.github/workflows/deploy-tests.yml b/.github/workflows/deploy-tests.yml
@@ -10,14 +10,21 @@ jobs:
   run-tests:
     needs: assess-file-changes
     if: ${{ needs.assess-file-changes.outputs.SOURCE_CHANGED == 'true' }}
-    uses: neurodatawithoutborders/nwbinspector/.github/workflows/testing.yml@dev
+    uses: neurodatawithoutborders/nwbinspector/.github/workflows/testing.yml@use_existing_data_files
 
   run-past-gallery:
     needs: assess-file-changes
     if: ${{ needs.assess-file-changes.outputs.SOURCE_CHANGED == 'true' }}
-    uses: neurodatawithoutborders/nwbinspector/.github/workflows/version_gallery.yml@dev
+    uses: neurodatawithoutborders/nwbinspector/.github/workflows/version_gallery.yml@use_existing_data_files
 
   run-dev-gallery:
     needs: assess-file-changes
     if: ${{ needs.assess-file-changes.outputs.SOURCE_CHANGED == 'true' }}
-    uses: neurodatawithoutborders/nwbinspector/.github/workflows/dev-gallery.yml@dev
+    uses: neurodatawithoutborders/nwbinspector/.github/workflows/dev-gallery.yml@use_existing_data_files
+
+  update-testing-files:
+    needs: assess-file-changes
+    if: ${{ needs.assess-file-changes.outputs.TESTING_CHANGED == 'true' }}
+    uses: neurodatawithoutborders/nwbinspector/.github/workflows/update-testing-files.yml@use_existing_data_files
+    secrets:
+      DANDI_API_KEY: ${{ secrets.DANDI_API_KEY }}
diff --git a/.github/workflows/dev-gallery.yml b/.github/workflows/dev-gallery.yml
@@ -3,7 +3,7 @@ on: workflow_call
 
 jobs:
   build-and-test:
-    name: Testing against past PyNWB versions
+    name: Testing against dev PyNWB version
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false
@@ -25,8 +25,10 @@ jobs:
           pip install -e .
           pip install git+https://github.com/neurodatawithoutborders/pynwb@dev
           pip install dandi
-      - name: Download testing data
-        run: dandi download 'https://gui-staging.dandiarchive.org/#/dandiset/204919'
+      - name: Download testing data and set config path
+        run: |
+          dandi download "https://gui-staging.dandiarchive.org/#/dandiset/204919"
+          python -c "from nwbinspector.testing import update_testing_config; update_testing_config(key='LOCAL_PATH', value='./204919/testing_files/')"
       - name: Uninstall h5py
         run: pip uninstall -y h5py
       - name: Install ROS3

diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml
@@ -22,16 +22,20 @@ jobs:
         run: |
           pip install pytest
           pip install pytest-cov
+
       - name: Install package
         run: |
           pip install -e .
           pip install dandi
-      - name: Download testing data
-        run: dandi download 'https://gui-staging.dandiarchive.org/#/dandiset/204919'
+      - name: Download testing data and set config path
+        run: |
+          dandi download "https://gui-staging.dandiarchive.org/#/dandiset/204919"
+          python -c "from nwbinspector.testing import update_testing_config; update_testing_config(key='LOCAL_PATH', value='./204919/testing_files/')"
       - name: Uninstall h5py
         run: pip uninstall -y h5py
       - name: Install ROS3
         run: conda install -c conda-forge h5py
+
       - name: Run pytest with coverage
         run: pytest -rsx --cov=./ --cov-report xml:./nwbinspector/nwbinspector/coverage.xml
       - if: ${{ matrix.python-version == '3.10' && matrix.os == 'ubuntu-latest' }}

diff --git a/.github/workflows/update-testing-files.yml b/.github/workflows/update-testing-files.yml
@@ -0,0 +1,37 @@
+name: Generate and Upload Testing Files
+on:
+  workflow_dispatch:
+  workflow_call:
+    secrets:
+      DANDI_API_KEY:
+        required: true
+
+env:
+  DANDI_API_KEY: ${{ secrets.DANDI_API_KEY }}
+
+jobs:
+  build-and-test:
+    name: Generate and Upload Testing Files
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+    steps:
+      - uses: actions/checkout@v2
+      - run: git fetch --prune --unshallow --tags
+      - name: Setup Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.9
+
+      - name: Install DANDI
+        run: pip install -e .[dandi]
+      - name: Update testing configuration path
+        run: python -c "from nwbinspector.testing import update_testing_config; update_testing_config(key='LOCAL_PATH', value='./testing_files/')"
+      - name: Generate testing files
+        run: python -c "from nwbinspector.testing import generate_testing_files; generate_testing_files()"
+      - name: Upload to DANDI
+        run: |
+          dandi download 'https://gui-staging.dandiarchive.org/#/dandiset/204919' --download dandiset.yaml
+          cd 204919
+          mv ../testing_files .
+          dandi upload -i dandi-staging --validation skip  # These are purposefully invalid files
diff --git a/.github/workflows/version_gallery.yml b/.github/workflows/version_gallery.yml
@@ -28,5 +28,11 @@ jobs:
         run: |
           pip install -e .
           pip install pynwb==${{ matrix.pynwb-version }}
+          pip install dandi
+      - name: Download testing data and set config path
+        run: |
+          dandi download "https://gui-staging.dandiarchive.org/#/dandiset/204919"
+          python -c "from nwbinspector.testing import update_testing_config; update_testing_config(key='LOCAL_PATH', value='./204919/testing_files/')"
+
       - name: Run pytest with coverage
         run: pytest -rsx
diff --git a/.gitignore b/.gitignore
@@ -3,6 +3,10 @@
 *.nwb
 manifest.json
 
+# Testing
+tests/testing_config.json
+*/testing_files/*
+
 # Python byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,15 @@
 # Upcoming
 
+### Improvements
+
+* Added PyNWB v2.1.0 specific file generation functions to the `testing` submodule, and altered the tests for `ImageSeries` to use these pre-existing files when available. Also included automated workflow to push the generated files to a DANDI-staging server for public access. [PR #288](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/288)
+
+### Fixes
+
+* Fixed relative path detection for cross-platform strings in `check_image_series_external_file_relative` [PR #288](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/288)
+
+
+
 # v0.4.14
 
 ### Fixes

diff --git a/README.md b/README.md
@@ -6,9 +6,7 @@
 [![codecov](https://codecov.io/gh/NeurodataWithoutBorders/nwbinspector/branch/dev/graphs/badge.svg?branch=dev)](https://codecov.io/github/NeurodataWithoutBorders/nwbinspector?branch=dev)
 [![License](https://img.shields.io/pypi/l/pynwb.svg)](https://github.com/NeurodataWithoutBorders/nwbinspector/license.txt)
 
-Inspect NWB files for compliance with [NWB Best Practices](https://www.nwb.org/best-practices/). This inspector is meant as a companion to the pynwb validator, which checks for strict schema compliance. In contrast, this tool attempts to apply some common sense to find components of the file that are technically compliant, but probably incorrect, or suboptimal, or deviate from best practices. This tool is meant simply as a data review aid. It does not catch all best practice violations, and any warnings it does produce should be checked by a knowledgeable reviewer.
-
-This project is under active development. You may use this as a stand-alone tool, but we do not advise you to code against this project at this time as we do expect the warnings to change as the project develops.
+Inspect NWB files for compliance with [NWB Best Practices](https://nwbinspector.readthedocs.io/en/dev/best_practices/best_practices_index.html). This inspector is meant as a companion to the PyNWB validator, which checks for strict schema compliance. In contrast, this tool attempts to apply some common sense to find components of the file that are technically compliant, but possibly incorrect, suboptimal in their representation, or deviate from best practices. This tool is meant simply as a data review aid. It does not catch all best practice violations, and any warnings it does produce should be checked by a knowledgeable reviewer.
 
 ## Installation
 ```bash
@@ -23,7 +21,4 @@ nwbinspector path/to/my/data.nwb
 
 # supply a path to a directory containing NWB files
 nwbinspector path/to/my/data/dir/
-
-# optional: supply modules to import before reading the file, e.g., for NWB extensions
-nwbinspector path/to/my/data.nwb -m my_extension_module1 my_extension_module2
 ```
diff --git a/base_test_config.json b/base_test_config.json
@@ -0,0 +1,3 @@
+{
+    "LOCAL_PATH": "/shared/catalystneuro/"
+}
diff --git a/nwbinspector/testing.py b/nwbinspector/testing.py
diff --git a/setup.py b/setup.py
@@ -1,25 +1,34 @@
 from setuptools import setup, find_packages
 from pathlib import Path
+from shutil import copy
 
 root = Path(__file__).parent
 with open(root / "README.md", "r") as f:
     long_description = f.read()
 with open(root / "requirements.txt") as f:
     install_requires = f.readlines()
-with open(root / "nwbinspector" / "version.py") as f:
-    exec(f.read())
+with open(root / "src" / "nwbinspector" / "version.py") as f:
+    version = f.read()
+
+# Instantiate the testing configuration file from the base file `base_test_config.json`
+base_test_config = Path.cwd() / "base_test_config.json"
+local_test_config = Path.cwd() / "tests" / "testing_config.json"
+if not local_test_config.exists():
+    copy(src=base_test_config, dst=local_test_config)
 
 setup(
     name="nwbinspector",
-    version=__version__,
+    version=version.split('"')[1],
     description="Tool to inspect NWB files for best practices compliance.",
     long_description=long_description,
     long_description_content_type="text/markdown",
     author="Ryan Ly, Ben Dichter, and Cody Baker.",
     author_email="[email protected], [email protected], [email protected]",
-    packages=find_packages(),
-    include_package_data=True,
-    url="https://github.com/NeurodataWithoutBorders/nwbinspector",
+    url="https://nwbinspector.readthedocs.io/",
+    keywords="nwb",
+    packages=find_packages(where="src"),
+    package_dir={"": "src"},
+    include_package_data=True,  # Includes files described in MANIFEST.in in the installation
     install_requires=install_requires,
     extras_require=dict(dandi=["dandi>=0.39.2"]),
     entry_points={"console_scripts": ["nwbinspector=nwbinspector.nwbinspector:inspect_all_cli"]},

diff --git a/nwbinspector/__init__.py → src/nwbinspector/__init__.py b/nwbinspector/__init__.py → src/nwbinspector/__init__.py
diff --git a/nwbinspector/checks/__init__.py → src/nwbinspector/checks/__init__.py b/nwbinspector/checks/__init__.py → src/nwbinspector/checks/__init__.py
diff --git a/nwbinspector/checks/behavior.py → src/nwbinspector/checks/behavior.py b/nwbinspector/checks/behavior.py → src/nwbinspector/checks/behavior.py
diff --git a/nwbinspector/checks/ecephys.py → src/nwbinspector/checks/ecephys.py b/nwbinspector/checks/ecephys.py → src/nwbinspector/checks/ecephys.py
diff --git a/nwbinspector/checks/general.py → src/nwbinspector/checks/general.py b/nwbinspector/checks/general.py → src/nwbinspector/checks/general.py
diff --git a/nwbinspector/checks/icephys.py → src/nwbinspector/checks/icephys.py b/nwbinspector/checks/icephys.py → src/nwbinspector/checks/icephys.py
diff --git a/nwbinspector/checks/image_series.py → src/nwbinspector/checks/image_series.py b/nwbinspector/checks/image_series.py → src/nwbinspector/checks/image_series.py
@@ -1,4 +1,5 @@
 """Check functions specific to ImageSeries."""
+import ntpath
 from pathlib import Path
 
 from pynwb.image import ImageSeries
@@ -39,7 +40,7 @@ def check_image_series_external_file_relative(image_series: ImageSeries):
         return
     for file_path in image_series.external_file:
         file_path = file_path.decode() if isinstance(file_path, bytes) else file_path
-        if Path(file_path).is_absolute():
+        if ntpath.isabs(file_path):  # ntpath required for cross-platform detection
             yield InspectorMessage(
                 message=(
                     f"The external file '{file_path}' is not a relative path. "
@@ -58,6 +59,4 @@ def check_image_series_data_size(image_series: ImageSeries, gb_lower_bound: floa
     data = image_series.data
     data_size_gb = data.size * data.dtype.itemsize / 1e9
     if data_size_gb > gb_lower_bound:
-        return InspectorMessage(
-            message=f"ImageSeries {image_series.name} is too large. Use external mode for storage",
-        )
+        return InspectorMessage(message=f"ImageSeries {image_series.name} is too large. Use external mode for storage")
diff --git a/nwbinspector/checks/images.py → src/nwbinspector/checks/images.py b/nwbinspector/checks/images.py → src/nwbinspector/checks/images.py
diff --git a/nwbinspector/checks/nwb_containers.py → src/nwbinspector/checks/nwb_containers.py b/nwbinspector/checks/nwb_containers.py → src/nwbinspector/checks/nwb_containers.py
diff --git a/nwbinspector/checks/nwbfile_metadata.py → src/nwbinspector/checks/nwbfile_metadata.py b/nwbinspector/checks/nwbfile_metadata.py → src/nwbinspector/checks/nwbfile_metadata.py
diff --git a/nwbinspector/checks/ogen.py → src/nwbinspector/checks/ogen.py b/nwbinspector/checks/ogen.py → src/nwbinspector/checks/ogen.py
diff --git a/nwbinspector/checks/ophys.py → src/nwbinspector/checks/ophys.py b/nwbinspector/checks/ophys.py → src/nwbinspector/checks/ophys.py
diff --git a/nwbinspector/checks/tables.py → src/nwbinspector/checks/tables.py b/nwbinspector/checks/tables.py → src/nwbinspector/checks/tables.py
diff --git a/nwbinspector/checks/time_series.py → src/nwbinspector/checks/time_series.py b/nwbinspector/checks/time_series.py → src/nwbinspector/checks/time_series.py
diff --git a/nwbinspector/config.schema.json → src/nwbinspector/config.schema.json b/nwbinspector/config.schema.json → src/nwbinspector/config.schema.json
diff --git a/nwbinspector/inspector_tools.py → src/nwbinspector/inspector_tools.py b/nwbinspector/inspector_tools.py → src/nwbinspector/inspector_tools.py
diff --git a/...ernal_configs/dandi.inspector_config.yaml → ...ernal_configs/dandi.inspector_config.yaml b/...ernal_configs/dandi.inspector_config.yaml → ...ernal_configs/dandi.inspector_config.yaml
diff --git a/nwbinspector/nwbinspector.py → src/nwbinspector/nwbinspector.py b/nwbinspector/nwbinspector.py → src/nwbinspector/nwbinspector.py
diff --git a/nwbinspector/register_checks.py → src/nwbinspector/register_checks.py b/nwbinspector/register_checks.py → src/nwbinspector/register_checks.py
diff --git a/src/nwbinspector/testing.py b/src/nwbinspector/testing.py
@@ -0,0 +1,120 @@
+"""Helper functions for internal use across the testing suite."""
+import os
+import json
+from distutils.util import strtobool
+from pathlib import Path
+from typing import Tuple, Optional
+
+from packaging.version import Version
+from pynwb import NWBHDF5IO
+from pynwb.image import ImageSeries
+
+from .tools import check_streaming_enabled, make_minimal_nwbfile
+from .utils import is_module_installed, get_package_version
+
+
+def check_streaming_tests_enabled() -> Tuple[bool, Optional[str]]:
+    """
+    General purpose helper for determining if the testing environment can support S3 DANDI streaming.
+
+    Returns the boolean status of the check and, if False, provides a string reason for the failure for the user to
+    utilize as they please (raise an error or warning with that message, print it, or ignore it).
+    """
+    failure_reason = ""
+
+    environment_skip_flag = os.environ.get("NWBI_SKIP_NETWORK_TESTS", "")
+    environment_skip_flag_bool = (
+        strtobool(os.environ.get("NWBI_SKIP_NETWORK_TESTS", "")) if environment_skip_flag != "" else False
+    )
+    if environment_skip_flag_bool:
+        failure_reason += "Environmental variable set to skip network tets."
+
+    streaming_enabled, streaming_failure_reason = check_streaming_enabled()
+    if not streaming_enabled:
+        failure_reason += streaming_failure_reason
+
+    have_dandi = is_module_installed("dandi")
+    if not have_dandi:
+        failure_reason += "The DANDI package is not installed on the system."
+
+    failure_reason = None if failure_reason == "" else failure_reason
+    return streaming_enabled and not environment_skip_flag_bool and have_dandi, failure_reason
+
+
+def load_testing_config() -> dict:
+    """Helper function for loading the testing configuration file as a dictionary."""
+    test_config_file_path = Path(__file__).parent.parent.parent / "tests" / "testing_config.json"
+
+    # This error would only occur if someone installed a previous version
+    # directly from GitHub and then updated the branch/commit in-place
+    if not test_config_file_path.exists():  # pragma: no cover
+        raise FileNotFoundError(
+            "The testing configuration file not found at the location '{test_config_file_path}'! "
+            "Please try reinstalling the package."
+        )
+
+    with open(file=test_config_file_path) as file:
+        test_config = json.load(file)
+
+    return test_config
+
+
+def update_testing_config(key: str, value):
+    """Update a key/value pair in the testing configuration file through the API."""
+    test_config_file_path = Path(__file__).parent.parent.parent / "tests" / "testing_config.json"
+
+    testing_config = load_testing_config()
+
+    if key not in testing_config:
+        raise KeyError("Updating the testing configuration file via the API is only possible for the pre-defined keys!")
+    testing_config[key] = value
+
+    with open(file=test_config_file_path, mode="w") as file:
+        json.dump(testing_config, file)
+
+
+def generate_testing_files():  # pragma: no cover
+    """Generate a local copy of the NWB files required for all tests."""
+    generate_image_series_testing_files()
+
+
+def generate_image_series_testing_files():  # pragma: no cover
+    """Generate a local copy of the NWB files required for the image series tests."""
+    assert get_package_version(name="pynwb") == Version("2.1.0"), "Generating the testing files requires PyNWB v2.1.0!"
+
+    testing_config = load_testing_config()
+
+    local_path = Path(testing_config["LOCAL_PATH"])
+    local_path.mkdir(exist_ok=True, parents=True)
+
+    movie_1_file_path = local_path / "temp_movie_1.mov"
+    nested_folder = local_path / "nested_folder"
+    nested_folder.mkdir(exist_ok=True)
+    movie_2_file_path = nested_folder / "temp_movie_2.avi"
+    for file_path in [movie_1_file_path, movie_2_file_path]:
+        with open(file=file_path, mode="w") as file:
+            file.write("Not a movie file, but at least it exists.")
+    nwbfile = make_minimal_nwbfile()
+    nwbfile.add_acquisition(
+        ImageSeries(
+            name="TestImageSeriesGoodExternalPaths",
+            rate=1.0,
+            external_file=[
+                "/".join([".", movie_1_file_path.name]),
+                "/".join([".", nested_folder.name, movie_2_file_path.name]),
+            ],
+        )
+    )
+    nwbfile.add_acquisition(
+        ImageSeries(
+            name="TestImageSeriesExternalPathDoesNotExist",
+            rate=1.0,
+            external_file=["madeup_file.mp4"],
+        )
+    )
+    absolute_file_path = str(Path("madeup_file.mp4").absolute())
+    nwbfile.add_acquisition(
+        ImageSeries(name="TestImageSeriesExternalPathIsNotRelative", rate=1.0, external_file=[absolute_file_path])
+    )
+    with NWBHDF5IO(path=local_path / "image_series_testing_file.nwb", mode="w") as io:
+        io.write(nwbfile)
diff --git a/nwbinspector/tools.py → src/nwbinspector/tools.py b/nwbinspector/tools.py → src/nwbinspector/tools.py
diff --git a/nwbinspector/utils.py → src/nwbinspector/utils.py b/nwbinspector/utils.py → src/nwbinspector/utils.py
diff --git a/nwbinspector/version.py → src/nwbinspector/version.py b/nwbinspector/version.py → src/nwbinspector/version.py