diff --git a/.gitignore b/.gitignore index 756579b..5c9c73d 100644 --- a/.gitignore +++ b/.gitignore @@ -11,7 +11,8 @@ SigMF.egg-info/* # test related .coverage -pytest.xml -coverage.xml +.hypothesis/ .tox/ +coverage.xml +pytest.xml htmlcov/* diff --git a/sigmf/sigmffile.py b/sigmf/sigmffile.py index 1b6f28c..15a6051 100644 --- a/sigmf/sigmffile.py +++ b/sigmf/sigmffile.py @@ -558,6 +558,12 @@ def set_data_file(self, data_file=None, data_buffer=None, skip_checksum=False, o self._memmap = raveled.reshape(mapped_reshape) self.shape = self._memmap.shape if (self._return_type is None) else self._memmap.shape[:-1] + if self.data_file is not None: + file_name = path.split(self.data_file)[1] + ext = path.splitext(file_name)[1] + if ext.lower() != SIGMF_DATASET_EXT: + self.set_global_field(SigMFFile.DATASET_KEY, file_name) + if skip_checksum: return None return self.calculate_hash() @@ -932,34 +938,36 @@ def get_dataset_filename_from_metadata(meta_fn, metadata=None): Parse provided metadata and return the expected data filename. In the case of a metadata only distribution, or if the file does not exist, this will return 'None'. The priority for conflicting: - 1. The file named .sigmf-meta if it exists - 2. The file in the `core:dataset` field (Non-Compliant Dataset) if it exists - 3. None (may be a metadata only distribution) + 1. The file named .SIGMF_DATASET_EXT if it exists + 2. The file in the DATASET_KEY field (Non-Compliant Dataset) if it exists + 3. None (may be a metadata only distribution) """ - compliant_data_fn = get_sigmf_filenames(meta_fn)["data_fn"] - noncompliant_data_fn = metadata["global"].get("core:dataset", None) + compliant_filename = get_sigmf_filenames(meta_fn)["data_fn"] + noncompliant_filename = metadata["global"].get(SigMFFile.DATASET_KEY, None) - if path.isfile(compliant_data_fn): - if noncompliant_data_fn: + if path.isfile(compliant_filename): + if noncompliant_filename: warnings.warn( - f"Compliant Dataset `{compliant_data_fn}` exists but " - f'"core:dataset" is also defined; using `{compliant_data_fn}`' + f"Compliant Dataset `{compliant_filename}` exists but " + f"{SigMFFile.DATASET_KEY} is also defined; using `{compliant_filename}`" ) - return compliant_data_fn - - elif noncompliant_data_fn: - if path.isfile(noncompliant_data_fn): - if metadata["global"].get("core:metadata_only", False): - warnings.warn( - 'Schema defines "core:dataset" but "core:meatadata_only" ' - f"also exists; using `{noncompliant_data_fn}`" + return compliant_filename + + elif noncompliant_filename: + dir_path = path.split(meta_fn)[0] + noncompliant_data_file_path = path.join(dir_path, noncompliant_filename) + if path.isfile(noncompliant_data_file_path): + if metadata["global"].get(SigMFFile.METADATA_ONLY_KEY, False): + raise SigMFFileError( + f"Schema defines {SigMFFile.DATASET_KEY} " + f"but {SigMFFile.METADATA_ONLY_KEY} also exists; using `{noncompliant_filename}`" ) - return noncompliant_data_fn + return noncompliant_data_file_path else: - warnings.warn( - f"Non-Compliant Dataset `{noncompliant_data_fn}` is specified " 'in "core:dataset" but does not exist!' + raise SigMFFileError( + f"Non-Compliant Dataset `{noncompliant_filename}` is specified in {SigMFFile.DATASET_KEY} " + "but does not exist!" ) - return None diff --git a/tests/conftest.py b/tests/conftest.py index 0e46aaf..a5379ef 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -11,6 +11,7 @@ import pytest from sigmf import __specification__ +from sigmf.archive import SIGMF_DATASET_EXT from sigmf.sigmffile import SigMFFile from .testdata import TEST_FLOAT32_DATA, TEST_METADATA @@ -18,8 +19,8 @@ @pytest.fixture def test_data_file(): - """when called, yields temporary file""" - with tempfile.NamedTemporaryFile() as temp: + """when called, yields temporary dataset""" + with tempfile.NamedTemporaryFile(suffix=f".{SIGMF_DATASET_EXT}") as temp: TEST_FLOAT32_DATA.tofile(temp.name) yield temp diff --git a/tests/test_ncd.py b/tests/test_ncd.py new file mode 100644 index 0000000..5978cbf --- /dev/null +++ b/tests/test_ncd.py @@ -0,0 +1,64 @@ +# Copyright: Multiple Authors +# +# This file is part of sigmf-python. https://github.com/sigmf/sigmf-python +# +# SPDX-License-Identifier: LGPL-3.0-or-later + +"""Tests for Non-Conforming Datasets""" + +import copy +import os +import shutil +import tempfile +import unittest +from pathlib import Path + +import numpy as np +from hypothesis import given +from hypothesis import strategies as st + +from sigmf.error import SigMFFileError +from sigmf.sigmffile import SigMFFile, fromfile + +from .testdata import TEST_FLOAT32_DATA, TEST_METADATA + + +class TestNonConformingDataset(unittest.TestCase): + """unit tests for NCD""" + + def setUp(self): + """create temporary path""" + self.temp_dir = Path(tempfile.mkdtemp()) + + def tearDown(self): + """remove temporary path""" + shutil.rmtree(self.temp_dir) + + @given(st.sampled_from([".", "subdir/", "sub0/sub1/sub2/"])) + def test_load_ncd(self, subdir: str) -> None: + """test loading non-conforming dataset""" + data_path = self.temp_dir / subdir / "dat.bin" + meta_path = self.temp_dir / subdir / "dat.sigmf-meta" + os.makedirs(data_path.parent, exist_ok=True) + + # create data file + TEST_FLOAT32_DATA.tofile(data_path) + + # create metadata file + ncd_metadata = copy.deepcopy(TEST_METADATA) + meta = SigMFFile(metadata=ncd_metadata, data_file=data_path) + meta.tofile(meta_path) + + # load dataset & validate we can read all the data + meta_loopback = fromfile(meta_path) + self.assertTrue(np.array_equal(TEST_FLOAT32_DATA, meta_loopback.read_samples())) + self.assertTrue(np.array_equal(TEST_FLOAT32_DATA, meta_loopback[:])) + + # delete the non-conforming dataset and ensure error is raised due to missing dataset; + # in Windows the SigMFFile instances need to be garbage collected first, + # otherwise the np.memmap instances (stored in self._memmap) block the deletion + meta = None + meta_loopback = None + os.remove(data_path) + with self.assertRaises(SigMFFileError): + _ = fromfile(meta_path)