From cb8962a260c4693481ee1486de610dd269c72a15 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Fri, 17 Jan 2025 16:06:16 +0000 Subject: [PATCH 01/14] Set up pooch registry fixture for tests --- tests/conftest.py | 118 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 tests/conftest.py diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..c0f4127 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,118 @@ +"""Pytest configuration file with shared fixtures across all tests.""" + +from collections.abc import Callable +from pathlib import Path + +import pooch +import pytest + +# load fixtures defined as modules +pytest_plugins = [ + "tests.fixtures.annotations", +] + +GIN_TEST_DATA_REPO = ( + "https://gin.g-node.org/neuroinformatics/ethology-test-data" +) + + +@pytest.fixture(scope="session") +def pooch_registry() -> pooch.Pooch: + """Return pooch registry with the test data. + + This fixture is common to the entire test session. This means that the + file registry is downloaded fresh for every test session. + + Returns + ------- + pooch.Pooch + A Pooch object that holds the URL and hash of the GIN repository with + the test data + + """ + # Cache the test data in the user's home directory + test_data_dir = Path.home() / ".ethology-test-data" + + # Remove the file registry if it exists + # (required in order to download it from scratch every time) + file_registry_path = test_data_dir / "files-registry.txt" + if file_registry_path.is_file(): + Path(file_registry_path).unlink() + + # Initialise pooch registry + registry = pooch.create( + test_data_dir, + base_url=f"{GIN_TEST_DATA_REPO}/raw/master/test_data", + ) + + # Download only the registry file from GIN + file_registry = pooch.retrieve( + url=f"{GIN_TEST_DATA_REPO}/raw/master/files-registry.txt", + known_hash=None, + fname=file_registry_path.name, + path=file_registry_path.parent, + ) + + # Load registry file onto pooch registry + registry.load_registry(file_registry) + + return registry + + +@pytest.fixture() +def get_paths_test_data() -> Callable[[dict, str], dict]: + """Get paths of the test data files under a specific subdirectory in the + GIN repository. + + This fixture is a factory of fixtures. It returns a function that can be + used to create a fixture that is a dictionary holding the paths under the + given `subdir_name`. + """ + + def _get_paths_test_data( + pooch_registry: pooch.Pooch, subdir_name: str + ) -> dict: + """Return the paths of the test files under the specified subdirectory. + + Parameters + ---------- + pooch_registry : pooch.Pooch + Pooch registry with the test data. + subdir_name : str + Name of the subdirectory under test_data for which to get the + paths. + + Returns + ------- + dict + Dictionary with the requested filenames as keys and the paths as + values. + + Notes + ----- + The name of the subdirectories is intended to match a testing module. + For example, to get the paths of the files used to test the annotations + module, we call `get_paths_test_data(pooch_registry, + "test_annotations")`. This assumes that in the GIN repository there is + a subdirectory named `test_annotations` under the `test_data` + directory with the relevant files. + + """ + filename_to_path = {} + + # In the pooch registry, each file is indexed by its path relative to + # the test_data directory. + for relative_filepath in pooch_registry.registry: + if relative_filepath.startswith(f"{subdir_name}/"): + fetched_filepath = Path( + pooch_registry.fetch( + relative_filepath, # under test_data + progressbar=True, + ) + ) + + filename_to_path[fetched_filepath.name] = fetched_filepath + + return filename_to_path + + return _get_paths_test_data From 9aaa6ab4ad3d466c0298092dc835a551ac7512a0 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Fri, 17 Jan 2025 16:07:12 +0000 Subject: [PATCH 02/14] Add annotation specific fixtures --- .pre-commit-config.yaml | 1 + tests/fixtures/__init__.py | 0 tests/fixtures/annotations.py | 16 ++++++++++++++++ 3 files changed, 17 insertions(+) create mode 100644 tests/fixtures/__init__.py create mode 100644 tests/fixtures/annotations.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8c8a2be..99263f3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,6 +19,7 @@ repos: args: [--fix=lf] - id: name-tests-test args: ["--pytest-test-first"] + exclude: ^tests/fixtures - id: requirements-txt-fixer - id: trailing-whitespace - repo: https://github.com/pre-commit/pygrep-hooks diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/fixtures/annotations.py b/tests/fixtures/annotations.py new file mode 100644 index 0000000..6ee6e09 --- /dev/null +++ b/tests/fixtures/annotations.py @@ -0,0 +1,16 @@ +"""Pytest fixtures shared across annotations tests.""" + +from collections.abc import Callable + +import pooch +import pytest + + +@pytest.fixture() +def annotations_test_data( + pooch_registry: pooch.Pooch, get_paths_test_data: Callable +) -> dict: + """Return the paths of the test files under the annotations subdirectory + in the GIN test data repository. + """ + return get_paths_test_data(pooch_registry, "test_annotations") From e1e889dfe888ab2af6c7817f768330908b384408 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Fri, 17 Jan 2025 16:09:18 +0000 Subject: [PATCH 03/14] Delete placeholder test --- tests/test_unit/test_placeholder.py | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 tests/test_unit/test_placeholder.py diff --git a/tests/test_unit/test_placeholder.py b/tests/test_unit/test_placeholder.py deleted file mode 100644 index 3ada1ee..0000000 --- a/tests/test_unit/test_placeholder.py +++ /dev/null @@ -1,2 +0,0 @@ -def test_placeholder(): - assert True From 49fc060a75619bf87a521528e809f35a9a6a3bfd Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Fri, 17 Jan 2025 16:23:20 +0000 Subject: [PATCH 04/14] Recover placeholder for CI to pass --- tests/test_unit/test_annotations/__init__.py | 0 tests/test_unit/test_annotations/test_placeholder.py | 2 ++ 2 files changed, 2 insertions(+) create mode 100644 tests/test_unit/test_annotations/__init__.py create mode 100644 tests/test_unit/test_annotations/test_placeholder.py diff --git a/tests/test_unit/test_annotations/__init__.py b/tests/test_unit/test_annotations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_unit/test_annotations/test_placeholder.py b/tests/test_unit/test_annotations/test_placeholder.py new file mode 100644 index 0000000..3ada1ee --- /dev/null +++ b/tests/test_unit/test_annotations/test_placeholder.py @@ -0,0 +1,2 @@ +def test_placeholder(): + assert True From 9bdcffdade60b9f571c3935a23fab515388d8f63 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 21 Jan 2025 17:49:21 +0000 Subject: [PATCH 05/14] Add json schemas --- MANIFEST.in | 4 + ethology/annotations/json_schemas/__init__.py | 0 .../json_schemas/schemas/README.md | 32 ++++ .../json_schemas/schemas/coco_schema.json | 78 +++++++++ .../json_schemas/schemas/via_schema.json | 88 ++++++++++ ethology/annotations/json_schemas/utils.py | 159 ++++++++++++++++++ 6 files changed, 361 insertions(+) create mode 100644 ethology/annotations/json_schemas/__init__.py create mode 100644 ethology/annotations/json_schemas/schemas/README.md create mode 100644 ethology/annotations/json_schemas/schemas/coco_schema.json create mode 100644 ethology/annotations/json_schemas/schemas/via_schema.json create mode 100644 ethology/annotations/json_schemas/utils.py diff --git a/MANIFEST.in b/MANIFEST.in index e16ea33..8d2ac11 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -6,3 +6,7 @@ recursive-exclude * __pycache__ recursive-exclude * *.py[co] recursive-exclude docs * recursive-exclude tests * + +# Include json schemas +recursive-include ethology *.json +recursive-include ethology *.md diff --git a/ethology/annotations/json_schemas/__init__.py b/ethology/annotations/json_schemas/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ethology/annotations/json_schemas/schemas/README.md b/ethology/annotations/json_schemas/schemas/README.md new file mode 100644 index 0000000..f7a8e1b --- /dev/null +++ b/ethology/annotations/json_schemas/schemas/README.md @@ -0,0 +1,32 @@ +## JSON schemas for manual annotations files. + +We use JSON schemas to validate the types of a supported annotation file. + +Note that the schema validation only checks the type of a key if that key is present. It does not check for the presence of the keys. + +If the meta-schema (under $schema) is not provided, the jsonschema validator uses the the latest released draft of the JSON schema specification. + +## VIA schema + +The VIA schema corresponds to the format exported by VGG Image Annotator 2.x.y (VIA) for object detection annotations. + +Each image under `_via_img_metadata` is indexed using a unique key: FILENAME-FILESIZE. We use "additionalProperties" to allow for any key name, see https://stackoverflow.com/a/69811612/24834957. + +The section `_via_image_id_list` contains an ordered list of image keys using a unique key: `FILENAME-FILESIZE`, the position in the list defines the image ID. + +The section `_via_attributes` region attributes and file attributes, to display in VIA's UI and to classify the data. + +The section `_via_data_format_version` contains the version of the VIA tool used. + + +## COCO schema +The COCO schema follows the COCO dataset format for object detection, see https://cocodataset.org/#format-data. + +Box coordinates are measured from the top left corner of the image, and are 0-indexed. +### References +---------- +- https://github.com/python-jsonschema/jsonschema +- https://json-schema.org/understanding-json-schema/ +- https://cocodataset.org/#format-data +- https://gitlab.com/vgg/via/-/blob/master/via-2.x.y/CodeDoc.md?ref_type=heads#description-of-via-project-json-file +- https://python-jsonschema.readthedocs.io/en/stable/api/#jsonschema.validate diff --git a/ethology/annotations/json_schemas/schemas/coco_schema.json b/ethology/annotations/json_schemas/schemas/coco_schema.json new file mode 100644 index 0000000..3793027 --- /dev/null +++ b/ethology/annotations/json_schemas/schemas/coco_schema.json @@ -0,0 +1,78 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "info": { + "type": "object" + }, + "licenses": { + "type": "array" + }, + "images": { + "type": "array", + "items": { + "type": "object", + "properties": { + "file_name": { + "type": "string" + }, + "id": { + "type": "integer" + }, + "width": { + "type": "integer" + }, + "height": { + "type": "integer" + } + } + } + }, + "annotations": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "integer" + }, + "image_id": { + "type": "integer" + }, + "bbox": { + "type": "array", + "items": { + "type": "integer" + } + }, + "category_id": { + "type": "integer" + }, + "area": { + "type": "number" + }, + "iscrowd": { + "type": "integer" + } + } + } + }, + "categories": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "integer" + }, + "name": { + "type": "string" + }, + "supercategory": { + "type": "string" + } + } + } + } + } +} diff --git a/ethology/annotations/json_schemas/schemas/via_schema.json b/ethology/annotations/json_schemas/schemas/via_schema.json new file mode 100644 index 0000000..8017a90 --- /dev/null +++ b/ethology/annotations/json_schemas/schemas/via_schema.json @@ -0,0 +1,88 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "_via_settings": { + "type": "object", + "properties": { + "ui": { + "type": "object" + }, + "core": { + "type": "object" + }, + "project": { + "type": "object" + } + } + }, + "_via_img_metadata": { + "type": "object", + "additionalProperties": { + "type": "object", + "properties": { + "filename": { + "type": "string" + }, + "size": { + "type": "integer" + }, + "regions": { + "type": "array", + "items": { + "type": "object", + "properties": { + "shape_attributes": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "x": { + "type": "integer" + }, + "y": { + "type": "integer" + }, + "width": { + "type": "integer" + }, + "height": { + "type": "integer" + } + } + }, + "region_attributes": { + "type": "object" + } + } + } + }, + "file_attributes": { + "type": "object" + } + } + } + }, + "_via_image_id_list": { + "type": "array", + "items": { + "type": "string" + } + }, + "_via_attributes": { + "type": "object", + "properties": { + "region": { + "type": "object" + }, + "file": { + "type": "object" + } + } + }, + "_via_data_format_version": { + "type": "string" + } + } +} diff --git a/ethology/annotations/json_schemas/utils.py b/ethology/annotations/json_schemas/utils.py new file mode 100644 index 0000000..04c2c90 --- /dev/null +++ b/ethology/annotations/json_schemas/utils.py @@ -0,0 +1,159 @@ +"""Utility functions for JSON schema files.""" + +import json +from pathlib import Path + +import jsonschema +import jsonschema.exceptions + + +def _get_default_VIA_schema() -> dict: + """Read a VIA schema file.""" + via_schema_path = Path(__file__).parent / "schemas" / "via_schema.json" + with open(via_schema_path) as file: + via_schema_dict = json.load(file) + return via_schema_dict + + +def _get_default_COCO_schema() -> dict: + """Read a COCO schema file.""" + coco_schema_path = Path(__file__).parent / "schemas" / "coco_schema.json" + with open(coco_schema_path) as file: + coco_schema_dict = json.load(file) + return coco_schema_dict + + +def _check_file_is_json(filepath: Path): + """Ensure that the file is a JSON file.""" + try: + with open(filepath) as file: + json.load(file) + except FileNotFoundError as not_found_error: + raise FileNotFoundError( + f"File not found: {filepath}." + ) from not_found_error + except json.JSONDecodeError as decode_error: + raise ValueError( + f"Error decoding JSON data from file: {filepath}." + ) from decode_error + + +def _check_file_matches_schema(filepath: Path, schema: dict): + """Ensure that the JSON file matches the expected schema. + + The schema validation only checks the type for each specified + key if the key exists. It does not check that the keys in the + schema are present in the JSON file. + """ + # read json file + with open(filepath) as file: + data = json.load(file) + + # check against schema if provided + if schema: + try: + jsonschema.validate(instance=data, schema=schema) + except jsonschema.exceptions.ValidationError as val_err: + raise val_err + except jsonschema.exceptions.SchemaError as schema_err: + raise schema_err + + +def _check_required_properties_keys( + required_properties_keys: list, schema: dict +): + """Ensure that the input schema includes the required "properties" keys.""" + # Get keys of "properties" dictionaries in schema + properties_keys_in_schema = _extract_properties_keys(schema) + + # Get list of "properties" keys that are required but not in schema + missing_keys = set(required_properties_keys) - set( + properties_keys_in_schema + ) + + # Raise error if there are missing keys in the schema + if missing_keys: + raise ValueError( + f"Required key(s) {sorted(missing_keys)} not found " + "in schema. Note that " + "a key may not be found correctly if the schema keywords " + "(such as 'properties', 'type' or 'items') are not spelt " + "correctly." + ) + + +def _check_required_keys_in_dict( + list_required_keys: list[str], + data: dict, + additional_message: str = "", +): + """Check if the required keys are present in the input data_dict.""" + missing_keys = set(list_required_keys) - data.keys() + if missing_keys: + raise ValueError( + f"Required key(s) {sorted(missing_keys)} not " + f"found{additional_message}." + ) + + +def _extract_properties_keys(schema: dict, parent_key="") -> list: + """Recursively extract the keys of all "properties" subdictionaries. + + Recursively extract the keys of all subdictionaries in the input + dictionary that are values to a "properties" key. The input dictionary + represents a JSON schema dictionary + (see https://json-schema.org/understanding-json-schema/about). + + The "properties" key always appears as part of a dictionary with at least + another key, that is "type" or "item". + """ + # The "property keys" are either "properties" or "additionalProperties" + # as they are the keys with the relevant data + property_keys = ["properties", "additionalProperties"] + + def _contains_properties_key(input: dict): + """Return True if the input dictionary contains a property key.""" + return any(x in input for x in property_keys) + + def _get_properties_subdict(input: dict): + """Get the subdictionary under the property key.""" + return input[next(k for k in property_keys if k in input)] + + keys_of_properties_dicts = [] + if "type" in schema: + if _contains_properties_key(schema): + # Get the subdictionary under the properties key + properties_subdict = _get_properties_subdict(schema) + + # Check if there is a nested "properties" dict inside the current + # one. If so, go down one level. + if _contains_properties_key(properties_subdict): + properties_subdict = _get_properties_subdict( + properties_subdict + ) + + # Add keys of deepest "properties dict" to list + keys_of_properties_dicts.extend( + [ + f"{parent_key}/{ky}" if parent_key else ky + for ky in properties_subdict + ] + ) + + # Inspect non-properties dictionaries under this properties subdict + for ky, val in properties_subdict.items(): + full_key = f"{parent_key}/{ky}" if parent_key else ky + keys_of_properties_dicts.extend( + _extract_properties_keys(val, full_key) + ) + + elif "items" in schema: + # Analyse the dictionary under the "items" key + properties_subdict = schema["items"] + keys_of_properties_dicts.extend( + _extract_properties_keys( + properties_subdict, parent_key=parent_key + ) + ) + + return sorted(keys_of_properties_dicts) From 289e6d19245fe65350f9d090078045bee2bf4980 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 21 Jan 2025 17:50:00 +0000 Subject: [PATCH 06/14] Add validators for VIA and COCO files --- ethology/annotations/validators.py | 183 +++++++++++++++++++++++++++++ 1 file changed, 183 insertions(+) create mode 100644 ethology/annotations/validators.py diff --git a/ethology/annotations/validators.py b/ethology/annotations/validators.py new file mode 100644 index 0000000..c654b99 --- /dev/null +++ b/ethology/annotations/validators.py @@ -0,0 +1,183 @@ +"""Validators for supported annotation files.""" + +import json +from pathlib import Path + +from attrs import define, field + +from ethology.annotations.json_schemas.utils import ( + _check_file_is_json, + _check_file_matches_schema, + _check_required_keys_in_dict, + _get_default_COCO_schema, + _get_default_VIA_schema, +) + + +@define +class ValidVIA: + """Class for valid VIA JSON files. + + It checks the input file is a `ValidJSON` and additionally checks the + file contains the required keys. + + Attributes + ---------- + path : pathlib.Path + Path to the VIA JSON file. + + schema : dict + The JSON schema is set to VIA_SCHEMA. + + Raises + ------ + FileNotFoundError + If the file does not exist. + ValueError + If the JSON file cannot be decoded. + jsonschema.exceptions.ValidationError + If the type of any of the keys in the JSON file + does not match the type specified in the schema. + ValueError + If the VIA JSON file misses any of the required keys. + + """ + + path: Path = field() + schema: dict = field( + default=_get_default_VIA_schema(), + init=False, + ) + required_keys: dict = field( + default={ + "main": ["_via_img_metadata", "_via_image_id_list"], + "images": ["filename", "regions"], + "regions": ["shape_attributes", "region_attributes"], + "shape_attributes": ["x", "y", "width", "height"], + }, + init=False, + ) + + @path.validator + def _file_is_json(self, attribute, value): + _check_file_is_json(value) + + @path.validator + def _file_matches_JSON_schema(self, attribute, value): + _check_file_matches_schema(value, self.schema) + + @path.validator + def _file_contains_required_keys(self, attribute, value): + """Ensure that the VIA JSON file contains the required keys.""" + # Read data as dict + with open(value) as file: + data = json.load(file) + + # Check first level keys + _check_required_keys_in_dict(self.required_keys["main"], data) + + # Check keys in nested dicts + for img_str, img_dict in data["_via_img_metadata"].items(): + # Check keys for each image dictionary + _check_required_keys_in_dict( + self.required_keys["images"], + img_dict, + additional_message=f" for {img_str}", + ) + + # Check keys for each region in an image + for i, region in enumerate(img_dict["regions"]): + # Check keys under first level per region + _check_required_keys_in_dict( + self.required_keys["regions"], + region, + additional_message=f" for region {i} under {img_str}", + ) + + # Check keys under "shape_attributes" per region + _check_required_keys_in_dict( + self.required_keys["shape_attributes"], + region["shape_attributes"], + additional_message=f" for region {i} under {img_str}", + ) + + +@define +class ValidCOCO: + """Class valid COCO JSON files for untracked data. + + It checks the input COCO JSON file contains the required keys. + + Attributes + ---------- + path : pathlib.Path + Path to the COCO JSON file. + + Raises + ------ + FileNotFoundError + If the file does not exist. + ValueError + If the JSON file cannot be decoded. + jsonschema.exceptions.ValidationError + If the type of any of the keys in the JSON file + does not match the type specified in the schema. + ValueError + If the COCO JSON file misses any of the required keys. + + """ + + path: Path = field() + schema: dict = field( + default=_get_default_COCO_schema(), + init=False, + # init=False makes the attribute to be unconditionally initialized + # with the specified default + ) + + # The keys of "required_keys" match the 1st level keys in a COCO JSON file + required_keys: dict = field( + default={ + "main": ["images", "annotations", "categories"], + "images": ["id", "file_name"], + "annotations": ["id", "image_id", "bbox", "category_id"], + "categories": ["id", "name", "supercategory"], + }, + init=False, + ) + + @path.validator + def _file_is_json(self, attribute, value): + _check_file_is_json(value) + + @path.validator + def _file_matches_JSON_schema(self, attribute, value): + _check_file_matches_schema(value, self.schema) + + @path.validator + def _file_contains_required_keys(self, attribute, value): + """Ensure that the COCO JSON file contains the required keys.""" + + # Helper function to singularise the input key for the + # error message + def _singularise_err_msg(key): + return key[:-1] if key != "categories" else key[:-3] + "y" + + # Read file as dict + with open(value) as file: + data = json.load(file) + + # Check first level keys + _check_required_keys_in_dict(self.required_keys["main"], data) + + # Check keys in every dict listed under the "images", "annotations" + # and "categories" keys + for ky in list(self.required_keys.keys())[1:]: + for instance_dict in data[ky]: + _check_required_keys_in_dict( + self.required_keys[ky], + instance_dict, + additional_message=( + f" for {_singularise_err_msg(ky)} {instance_dict}" + ), + ) From 20b72f29f7fc17bf694fcb3343bd13bbf27a84ba Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 21 Jan 2025 17:54:21 +0000 Subject: [PATCH 07/14] Update MANIFEST --- MANIFEST.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index 8d2ac11..63adff3 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -8,5 +8,5 @@ recursive-exclude docs * recursive-exclude tests * # Include json schemas -recursive-include ethology *.json -recursive-include ethology *.md +recursive-include ethology/annotations/json_schemas/schemas *.json +recursive-include ethology/annotations/json_schemas/schemas *.md From 599fe87ea2d358e36c9bfa392d28b2cdd87a7486 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 21 Jan 2025 17:56:18 +0000 Subject: [PATCH 08/14] Add tests for supported validators --- .../test_annotations/test_validators.py | 437 ++++++++++++++++++ 1 file changed, 437 insertions(+) create mode 100644 tests/test_unit/test_annotations/test_validators.py diff --git a/tests/test_unit/test_annotations/test_validators.py b/tests/test_unit/test_annotations/test_validators.py new file mode 100644 index 0000000..8fb697d --- /dev/null +++ b/tests/test_unit/test_annotations/test_validators.py @@ -0,0 +1,437 @@ +import json +from contextlib import nullcontext as does_not_raise +from pathlib import Path + +import jsonschema +import pytest + +from ethology.annotations.json_schemas.utils import ( + _check_required_keys_in_dict, + _check_required_properties_keys, + _extract_properties_keys, +) +from ethology.annotations.validators import ValidCOCO, ValidVIA + + +@pytest.fixture() +def json_file_decode_error(tmp_path: Path) -> Path: + """Return path to a JSON file with a decoding error.""" + json_file = tmp_path / "JSON_decode_error.json" + with open(json_file, "w") as f: + f.write("just-a-string") + return json_file + + +@pytest.fixture() +def json_file_not_found_error(tmp_path: Path) -> Path: + """Return path to a JSON file that does not exist.""" + return tmp_path / "JSON_file_not_found.json" + + +@pytest.fixture() +def VIA_file_schema_mismatch( + annotations_test_data: dict, + tmp_path: Path, +) -> Path: + """Return path to a VIA JSON file that does not match its schema. + + Specifically, we modify the type of the "width" of the first bounding box + in the first image, from "int" to "str" + """ + # Read valid JSON file + valid_via_file_sample_1 = annotations_test_data["VIA_JSON_sample_1.json"] + with open(valid_via_file_sample_1) as f: + data = json.load(f) + + # Modify file so that it doesn't match the corresponding schema + # (make width a string) + _, img_dict = list(data["_via_img_metadata"].items())[0] + img_dict["regions"][0]["shape_attributes"]["width"] = "49" + + # Save the modified JSON to a new file + out_json = tmp_path / f"{valid_via_file_sample_1.stem}_schema_error.json" + with open(out_json, "w") as f: + json.dump(data, f) + return out_json + + +@pytest.fixture() +def COCO_file_schema_mismatch( + annotations_test_data: dict, + tmp_path: Path, +) -> Path: + """Return path to a COCO JSON file that doesn't match its schema. + + Specifically, we modify the type of the object under the "annotations" + key from "list of dicts" to "list" + """ + # Read valid JSON file + valid_coco_file_sample_1 = annotations_test_data["COCO_JSON_sample_1.json"] + with open(valid_coco_file_sample_1) as f: + data = json.load(f) + + # Modify file so that it doesn't match the corresponding schema + data["annotations"] = [1, 2, 3] # [d] for d in data["annotations"]] + + # save the modified json to a new file + out_json = tmp_path / f"{valid_coco_file_sample_1.stem}_schema_error.json" + with open(out_json, "w") as f: + json.dump(data, f) + return out_json + + +@pytest.fixture() +def small_schema() -> dict: + """Small schema with properties keys: + ["a", "b", "b/b1", "c", "c/c1", "c/c2"]. + """ + return { + "type": "object", + "properties": { + "a": { + "type": "array", + "items": {"type": "string"}, + }, + "b": { + "type": "object", + "properties": {"b1": {"type": "string"}}, + }, + "c": { + "type": "object", + "properties": { + "c1": {"type": "string"}, + "c2": {"type": "string"}, + }, + }, + }, + } + + +@pytest.fixture() +def default_VIA_schema() -> dict: + """Get default VIA schema.""" + from ethology.annotations.json_schemas.utils import _get_default_VIA_schema + + return _get_default_VIA_schema() + + +@pytest.fixture() +def default_COCO_schema() -> dict: + """Get default COCO schema.""" + from ethology.annotations.json_schemas.utils import ( + _get_default_COCO_schema, + ) + + return _get_default_COCO_schema() + + +@pytest.mark.parametrize( + "input_file,", + [ + "VIA_JSON_sample_1.json", + "VIA_JSON_sample_2.json", + ], +) +def test_valid_VIA(input_file: str, annotations_test_data: dict): + """Test the VIA validator with valid inputs.""" + filepath = annotations_test_data[input_file] + with does_not_raise(): + ValidVIA(path=filepath) + + +@pytest.mark.parametrize( + "invalid_input_file, expected_exception, log_message", + [ + ( + "json_file_decode_error", + pytest.raises(ValueError), + "Error decoding JSON data from file", + ), + ( + "json_file_not_found_error", + pytest.raises(FileNotFoundError), + "File not found", + ), + ( + "VIA_file_schema_mismatch", + pytest.raises(jsonschema.exceptions.ValidationError), + "'49' is not of type 'integer'", + ), + ], +) +def test_valid_VIA_invalid_files( + invalid_input_file: str, + expected_exception: pytest.raises, + log_message: str, + request: pytest.FixtureRequest, +): + """Test the VIA validator throwS the expected errors when passed invalid + inputs. + """ + invalid_json_file = request.getfixturevalue(invalid_input_file) + + with expected_exception as excinfo: + ValidVIA(path=invalid_json_file) + + # Check that the error message contains expected string + assert log_message in str(excinfo.value) + + # Check the error message contains file path + if not isinstance(excinfo.value, jsonschema.exceptions.ValidationError): + assert invalid_json_file.name in str(excinfo.value) + + +@pytest.mark.parametrize( + "input_file", + [ + "COCO_JSON_sample_1.json", + "COCO_JSON_sample_2.json", + ], +) +def test_valid_COCO(input_file: str, annotations_test_data: dict): + """Test the COCO validator with valid inputs.""" + filepath = annotations_test_data[input_file] + with does_not_raise(): + ValidCOCO(path=filepath) + + +@pytest.mark.parametrize( + "invalid_input_file, expected_exception, log_message", + [ + ( + "json_file_decode_error", + pytest.raises(ValueError), + "Error decoding JSON data from file", + ), + ( + "json_file_not_found_error", + pytest.raises(FileNotFoundError), + "File not found", + ), + ( + "COCO_file_schema_mismatch", + pytest.raises(jsonschema.exceptions.ValidationError), + "3 is not of type 'object'", + ), + ], +) +def test_valid_COCO_invalid_files( + invalid_input_file: str, + expected_exception: pytest.raises, + log_message: str, + request: pytest.FixtureRequest, +): + """Test the COCO validator throws the expected errors when passed invalid + inputs. + """ + invalid_json_file = request.getfixturevalue(invalid_input_file) + + with expected_exception as excinfo: + ValidCOCO(path=invalid_json_file) + + # Check that the error message contains expected string + assert log_message in str(excinfo.value) + + # Check the error message contains file path + # assert invalid_json_file.name in str(excinfo.value) + if not isinstance(excinfo.value, jsonschema.exceptions.ValidationError): + assert invalid_json_file.name in str(excinfo.value) + + +@pytest.mark.parametrize( + "schema, expected_properties_keys", + [ + ("small_schema", ["a", "b", "b/b1", "c", "c/c1", "c/c2"]), + ( + "default_VIA_schema", + [ + "_via_attributes", + "_via_attributes/file", + "_via_attributes/region", + "_via_data_format_version", + "_via_image_id_list", + "_via_img_metadata", + "_via_img_metadata/file_attributes", + "_via_img_metadata/filename", + "_via_img_metadata/regions", + "_via_img_metadata/regions/region_attributes", + "_via_img_metadata/regions/shape_attributes", + "_via_img_metadata/regions/shape_attributes/height", + "_via_img_metadata/regions/shape_attributes/name", + "_via_img_metadata/regions/shape_attributes/width", + "_via_img_metadata/regions/shape_attributes/x", + "_via_img_metadata/regions/shape_attributes/y", + "_via_img_metadata/size", + "_via_settings", + "_via_settings/core", + "_via_settings/project", + "_via_settings/ui", + ], + ), + ( + "default_COCO_schema", + [ + "annotations", + "annotations/area", + "annotations/bbox", + "annotations/category_id", + "annotations/id", + "annotations/image_id", + "annotations/iscrowd", + "categories", + "categories/id", + "categories/name", + "categories/supercategory", + "images", + "images/file_name", + "images/height", + "images/id", + "images/width", + "info", + "licenses", + ], + ), + ], +) +def test_extract_properties_keys( + schema: dict, + expected_properties_keys: list, + request: pytest.FixtureRequest, +): + """Test the _extract_properties_keys helper function.""" + schema = request.getfixturevalue(schema) + assert _extract_properties_keys(schema) == sorted(expected_properties_keys) + + +@pytest.mark.parametrize( + "list_required_keys, data_dict, additional_message, expected_exception", + [ + ( + ["images", "annotations", "categories"], + {"images": "", "annotations": "", "categories": ""}, + "", + does_not_raise(), + ), # zero missing keys + ( + ["images", "annotations", "categories"], + {"annotations": "", "categories": ""}, + "", + pytest.raises(ValueError), + ), # one missing key + ( + ["images", "annotations", "categories"], + {"annotations": ""}, + "", + pytest.raises(ValueError), + ), # two missing keys + ( + ["images", "annotations", "categories"], + {"annotations": "", "categories": ""}, + "FOO", + pytest.raises(ValueError), + ), # one missing key with additional message + ], +) +def test_check_required_keys_in_dict( + list_required_keys: list, + data_dict: dict, + additional_message: str, + expected_exception: pytest.raises, +): + """Test the _check_required_keys_in_dict helper function.""" + with expected_exception as excinfo: + _check_required_keys_in_dict( + list_required_keys, data_dict, additional_message + ) + + if excinfo: + missing_keys = set(list_required_keys) - data_dict.keys() + assert str(excinfo.value) == ( + f"Required key(s) {sorted(missing_keys)} " + f"not found{additional_message}." + ) + + +def test_check_required_properties_keys(small_schema: dict): + """Test the _check_required_keys helper function.""" + # Define a sample schema from "small_schema" + # with a "properties" key missing (e.g. "c/c2") + small_schema["properties"]["c"]["properties"].pop("c2") + + # Define required "properties" keys + required_keys = ["a", "b", "c/c2"] + + # Run check + with pytest.raises(ValueError) as excinfo: + _check_required_properties_keys(required_keys, small_schema) + + # Check error message + assert "Required key(s) ['c/c2'] not found in schema" in str(excinfo.value) + + +@pytest.mark.parametrize( + "input_file,", + [ + "VIA_JSON_sample_1.json", + "VIA_JSON_sample_2.json", + ], +) +def test_required_keys_in_VIA_schema( + input_file: str, default_VIA_schema: dict, annotations_test_data: dict +): + """Check the provided VIA schema contains the ValidVIA required keys.""" + # Get required keys from a VIA valid file + filepath = annotations_test_data[input_file] + valid_via = ValidVIA(path=filepath) + required_VIA_keys = valid_via.required_keys + + # Map required keys to "properties" keys in schema + map_required_to_properties_keys = { + "main": "", + "images": "_via_img_metadata", + "regions": "_via_img_metadata/regions", + "shape_attributes": "_via_img_metadata/regions/shape_attributes", + } + + # Express required keys as required "properties" keys + required_property_keys = [ + val if ky == "main" else f"{map_required_to_properties_keys[ky]}/{val}" + for ky, values in required_VIA_keys.items() + for val in values + ] + + # Run check + _check_required_properties_keys( + required_property_keys, + default_VIA_schema, + ) + + +@pytest.mark.parametrize( + "input_file,", + [ + "COCO_JSON_sample_1.json", + "COCO_JSON_sample_2.json", + ], +) +def test_required_keys_in_COCO_schema( + input_file: str, default_COCO_schema: dict, annotations_test_data: dict +): + """Check the provided COCO schema contains the ValidCOCO required keys.""" + # Get required keys from a COCO valid file + filepath = annotations_test_data[input_file] + valid_coco = ValidCOCO(path=filepath) + required_COCO_keys = valid_coco.required_keys + + # Prepare list of required "properties" keys with full paths + required_properties_keys = [ + f"{level}/{ky}" if level != "main" else ky + for level, required_keys in required_COCO_keys.items() + for ky in required_keys + ] + + # Run check + _check_required_properties_keys( + required_properties_keys, + default_COCO_schema, + ) From 02e714c8efa87d1142157becf703e01a10147100 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 21 Jan 2025 18:43:35 +0000 Subject: [PATCH 09/14] Combine validators tests --- .../test_annotations/test_validators.py | 89 +++++++------------ 1 file changed, 30 insertions(+), 59 deletions(-) diff --git a/tests/test_unit/test_annotations/test_validators.py b/tests/test_unit/test_annotations/test_validators.py index 8fb697d..c289153 100644 --- a/tests/test_unit/test_annotations/test_validators.py +++ b/tests/test_unit/test_annotations/test_validators.py @@ -126,114 +126,85 @@ def default_COCO_schema() -> dict: @pytest.mark.parametrize( - "input_file,", + "input_file, validator", [ - "VIA_JSON_sample_1.json", - "VIA_JSON_sample_2.json", + ("VIA_JSON_sample_1.json", ValidVIA), + ("VIA_JSON_sample_2.json", ValidVIA), + ("COCO_JSON_sample_1.json", ValidCOCO), + ("COCO_JSON_sample_2.json", ValidCOCO), ], ) -def test_valid_VIA(input_file: str, annotations_test_data: dict): - """Test the VIA validator with valid inputs.""" +def test_validators_valid_input_files( + input_file: str, + validator: type[ValidVIA | ValidCOCO], + annotations_test_data: dict, +): + """Test the file validator with valid inputs.""" filepath = annotations_test_data[input_file] with does_not_raise(): - ValidVIA(path=filepath) + validator(path=filepath) @pytest.mark.parametrize( - "invalid_input_file, expected_exception, log_message", + "invalid_input_file, validator, expected_exception, log_message", [ ( "json_file_decode_error", + ValidVIA, pytest.raises(ValueError), "Error decoding JSON data from file", ), ( "json_file_not_found_error", + ValidVIA, pytest.raises(FileNotFoundError), - "File not found", - ), - ( - "VIA_file_schema_mismatch", - pytest.raises(jsonschema.exceptions.ValidationError), - "'49' is not of type 'integer'", + "No such file or directory: ", ), - ], -) -def test_valid_VIA_invalid_files( - invalid_input_file: str, - expected_exception: pytest.raises, - log_message: str, - request: pytest.FixtureRequest, -): - """Test the VIA validator throwS the expected errors when passed invalid - inputs. - """ - invalid_json_file = request.getfixturevalue(invalid_input_file) - - with expected_exception as excinfo: - ValidVIA(path=invalid_json_file) - - # Check that the error message contains expected string - assert log_message in str(excinfo.value) - - # Check the error message contains file path - if not isinstance(excinfo.value, jsonschema.exceptions.ValidationError): - assert invalid_json_file.name in str(excinfo.value) - - -@pytest.mark.parametrize( - "input_file", - [ - "COCO_JSON_sample_1.json", - "COCO_JSON_sample_2.json", - ], -) -def test_valid_COCO(input_file: str, annotations_test_data: dict): - """Test the COCO validator with valid inputs.""" - filepath = annotations_test_data[input_file] - with does_not_raise(): - ValidCOCO(path=filepath) - - -@pytest.mark.parametrize( - "invalid_input_file, expected_exception, log_message", - [ ( "json_file_decode_error", + ValidCOCO, pytest.raises(ValueError), "Error decoding JSON data from file", ), ( "json_file_not_found_error", + ValidCOCO, pytest.raises(FileNotFoundError), - "File not found", + "No such file or directory: ", + ), + ( + "VIA_file_schema_mismatch", + ValidVIA, + pytest.raises(jsonschema.exceptions.ValidationError), + "'49' is not of type 'integer'", ), ( "COCO_file_schema_mismatch", + ValidCOCO, pytest.raises(jsonschema.exceptions.ValidationError), "3 is not of type 'object'", ), ], ) -def test_valid_COCO_invalid_files( +def test_validators_invalid_input_files( invalid_input_file: str, + validator: type[ValidVIA | ValidCOCO], expected_exception: pytest.raises, log_message: str, request: pytest.FixtureRequest, ): - """Test the COCO validator throws the expected errors when passed invalid + """Test the validators throw the expected errors when passed invalid inputs. """ invalid_json_file = request.getfixturevalue(invalid_input_file) with expected_exception as excinfo: - ValidCOCO(path=invalid_json_file) + validator(path=invalid_json_file) # Check that the error message contains expected string assert log_message in str(excinfo.value) # Check the error message contains file path - # assert invalid_json_file.name in str(excinfo.value) if not isinstance(excinfo.value, jsonschema.exceptions.ValidationError): assert invalid_json_file.name in str(excinfo.value) From 91addb1a7a01b51916df9c0c1bae7e17270d4bf4 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 21 Jan 2025 18:44:29 +0000 Subject: [PATCH 10/14] Simplify JSON check --- ethology/annotations/json_schemas/utils.py | 30 +++++++++++----------- ethology/annotations/validators.py | 2 ++ 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/ethology/annotations/json_schemas/utils.py b/ethology/annotations/json_schemas/utils.py index 04c2c90..0d87d50 100644 --- a/ethology/annotations/json_schemas/utils.py +++ b/ethology/annotations/json_schemas/utils.py @@ -8,7 +8,7 @@ def _get_default_VIA_schema() -> dict: - """Read a VIA schema file.""" + """Get the VIA schema as a dictionary.""" via_schema_path = Path(__file__).parent / "schemas" / "via_schema.json" with open(via_schema_path) as file: via_schema_dict = json.load(file) @@ -16,7 +16,7 @@ def _get_default_VIA_schema() -> dict: def _get_default_COCO_schema() -> dict: - """Read a COCO schema file.""" + """Get the COCO schema file as a dictionary.""" coco_schema_path = Path(__file__).parent / "schemas" / "coco_schema.json" with open(coco_schema_path) as file: coco_schema_dict = json.load(file) @@ -24,32 +24,32 @@ def _get_default_COCO_schema() -> dict: def _check_file_is_json(filepath: Path): - """Ensure that the file is a JSON file.""" + """Check the input file can be read as a JSON.""" try: with open(filepath) as file: json.load(file) - except FileNotFoundError as not_found_error: - raise FileNotFoundError( - f"File not found: {filepath}." - ) from not_found_error except json.JSONDecodeError as decode_error: + # We override the error message for clarity raise ValueError( - f"Error decoding JSON data from file: {filepath}." + f"Error decoding JSON data from file: {filepath}. " + "The data being deserialized is not a valid JSON. " ) from decode_error + except Exception as error: + raise error -def _check_file_matches_schema(filepath: Path, schema: dict): - """Ensure that the JSON file matches the expected schema. +def _check_file_matches_schema(filepath: Path, schema: dict | None): + """Ensure that the input JSON file matches the given schema. The schema validation only checks the type for each specified key if the key exists. It does not check that the keys in the schema are present in the JSON file. """ - # read json file + # Read json file with open(filepath) as file: data = json.load(file) - # check against schema if provided + # Check against schema if provided if schema: try: jsonschema.validate(instance=data, schema=schema) @@ -62,7 +62,7 @@ def _check_file_matches_schema(filepath: Path, schema: dict): def _check_required_properties_keys( required_properties_keys: list, schema: dict ): - """Ensure that the input schema includes the required "properties" keys.""" + """Ensure the input schema includes the required "properties" keys.""" # Get keys of "properties" dictionaries in schema properties_keys_in_schema = _extract_properties_keys(schema) @@ -87,8 +87,8 @@ def _check_required_keys_in_dict( data: dict, additional_message: str = "", ): - """Check if the required keys are present in the input data_dict.""" - missing_keys = set(list_required_keys) - data.keys() + """Check if the required keys are present in the input dictionary.""" + missing_keys = set(list_required_keys) - set(data.keys()) if missing_keys: raise ValueError( f"Required key(s) {sorted(missing_keys)} not " diff --git a/ethology/annotations/validators.py b/ethology/annotations/validators.py index c654b99..8c912e2 100644 --- a/ethology/annotations/validators.py +++ b/ethology/annotations/validators.py @@ -58,6 +58,7 @@ class ValidVIA: init=False, ) + # Note: the validators are applied in order @path.validator def _file_is_json(self, attribute, value): _check_file_is_json(value) @@ -146,6 +147,7 @@ class ValidCOCO: init=False, ) + # Note: the validators are applied in order @path.validator def _file_is_json(self, attribute, value): _check_file_is_json(value) From f448c57830f879e3bede2f11606c001623b5f8c8 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 21 Jan 2025 19:04:15 +0000 Subject: [PATCH 11/14] Delete placeholder --- tests/test_unit/test_annotations/test_placeholder.py | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 tests/test_unit/test_annotations/test_placeholder.py diff --git a/tests/test_unit/test_annotations/test_placeholder.py b/tests/test_unit/test_annotations/test_placeholder.py deleted file mode 100644 index 3ada1ee..0000000 --- a/tests/test_unit/test_annotations/test_placeholder.py +++ /dev/null @@ -1,2 +0,0 @@ -def test_placeholder(): - assert True From e01b993a8c9f86fb064689b700eed78b012d2b61 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 21 Jan 2025 19:04:24 +0000 Subject: [PATCH 12/14] Rename schemas --- .../schemas/{coco_schema.json => COCO_schema.json} | 0 .../json_schemas/schemas/{via_schema.json => VIA_schema.json} | 0 ethology/annotations/json_schemas/utils.py | 4 ++-- 3 files changed, 2 insertions(+), 2 deletions(-) rename ethology/annotations/json_schemas/schemas/{coco_schema.json => COCO_schema.json} (100%) rename ethology/annotations/json_schemas/schemas/{via_schema.json => VIA_schema.json} (100%) diff --git a/ethology/annotations/json_schemas/schemas/coco_schema.json b/ethology/annotations/json_schemas/schemas/COCO_schema.json similarity index 100% rename from ethology/annotations/json_schemas/schemas/coco_schema.json rename to ethology/annotations/json_schemas/schemas/COCO_schema.json diff --git a/ethology/annotations/json_schemas/schemas/via_schema.json b/ethology/annotations/json_schemas/schemas/VIA_schema.json similarity index 100% rename from ethology/annotations/json_schemas/schemas/via_schema.json rename to ethology/annotations/json_schemas/schemas/VIA_schema.json diff --git a/ethology/annotations/json_schemas/utils.py b/ethology/annotations/json_schemas/utils.py index 0d87d50..8072be3 100644 --- a/ethology/annotations/json_schemas/utils.py +++ b/ethology/annotations/json_schemas/utils.py @@ -9,7 +9,7 @@ def _get_default_VIA_schema() -> dict: """Get the VIA schema as a dictionary.""" - via_schema_path = Path(__file__).parent / "schemas" / "via_schema.json" + via_schema_path = Path(__file__).parent / "schemas" / "VIA_schema.json" with open(via_schema_path) as file: via_schema_dict = json.load(file) return via_schema_dict @@ -17,7 +17,7 @@ def _get_default_VIA_schema() -> dict: def _get_default_COCO_schema() -> dict: """Get the COCO schema file as a dictionary.""" - coco_schema_path = Path(__file__).parent / "schemas" / "coco_schema.json" + coco_schema_path = Path(__file__).parent / "schemas" / "COCO_schema.json" with open(coco_schema_path) as file: coco_schema_dict = json.load(file) return coco_schema_dict From 534d32b1196acd80dd62b69b809a5adb24e83009 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 21 Jan 2025 19:07:06 +0000 Subject: [PATCH 13/14] Small edits caps --- .../test_annotations/test_validators.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/test_unit/test_annotations/test_validators.py b/tests/test_unit/test_annotations/test_validators.py index c289153..8d3fd1d 100644 --- a/tests/test_unit/test_annotations/test_validators.py +++ b/tests/test_unit/test_annotations/test_validators.py @@ -39,8 +39,8 @@ def VIA_file_schema_mismatch( in the first image, from "int" to "str" """ # Read valid JSON file - valid_via_file_sample_1 = annotations_test_data["VIA_JSON_sample_1.json"] - with open(valid_via_file_sample_1) as f: + valid_VIA_file_sample_1 = annotations_test_data["VIA_JSON_sample_1.json"] + with open(valid_VIA_file_sample_1) as f: data = json.load(f) # Modify file so that it doesn't match the corresponding schema @@ -49,7 +49,7 @@ def VIA_file_schema_mismatch( img_dict["regions"][0]["shape_attributes"]["width"] = "49" # Save the modified JSON to a new file - out_json = tmp_path / f"{valid_via_file_sample_1.stem}_schema_error.json" + out_json = tmp_path / f"{valid_VIA_file_sample_1.stem}_schema_error.json" with open(out_json, "w") as f: json.dump(data, f) return out_json @@ -66,15 +66,15 @@ def COCO_file_schema_mismatch( key from "list of dicts" to "list" """ # Read valid JSON file - valid_coco_file_sample_1 = annotations_test_data["COCO_JSON_sample_1.json"] - with open(valid_coco_file_sample_1) as f: + valid_COCO_file_sample_1 = annotations_test_data["COCO_JSON_sample_1.json"] + with open(valid_COCO_file_sample_1) as f: data = json.load(f) # Modify file so that it doesn't match the corresponding schema data["annotations"] = [1, 2, 3] # [d] for d in data["annotations"]] # save the modified json to a new file - out_json = tmp_path / f"{valid_coco_file_sample_1.stem}_schema_error.json" + out_json = tmp_path / f"{valid_COCO_file_sample_1.stem}_schema_error.json" with open(out_json, "w") as f: json.dump(data, f) return out_json @@ -353,8 +353,8 @@ def test_required_keys_in_VIA_schema( """Check the provided VIA schema contains the ValidVIA required keys.""" # Get required keys from a VIA valid file filepath = annotations_test_data[input_file] - valid_via = ValidVIA(path=filepath) - required_VIA_keys = valid_via.required_keys + valid_VIA = ValidVIA(path=filepath) + required_VIA_keys = valid_VIA.required_keys # Map required keys to "properties" keys in schema map_required_to_properties_keys = { @@ -391,8 +391,8 @@ def test_required_keys_in_COCO_schema( """Check the provided COCO schema contains the ValidCOCO required keys.""" # Get required keys from a COCO valid file filepath = annotations_test_data[input_file] - valid_coco = ValidCOCO(path=filepath) - required_COCO_keys = valid_coco.required_keys + valid_COCO = ValidCOCO(path=filepath) + required_COCO_keys = valid_COCO.required_keys # Prepare list of required "properties" keys with full paths required_properties_keys = [ From 272d6178dfb39ab8d89db37934012391404b07a3 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 21 Jan 2025 19:12:45 +0000 Subject: [PATCH 14/14] Update docstrings --- ethology/annotations/json_schemas/utils.py | 6 ++-- ethology/annotations/validators.py | 35 +++++++++++++--------- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/ethology/annotations/json_schemas/utils.py b/ethology/annotations/json_schemas/utils.py index 8072be3..2f92dbd 100644 --- a/ethology/annotations/json_schemas/utils.py +++ b/ethology/annotations/json_schemas/utils.py @@ -16,7 +16,7 @@ def _get_default_VIA_schema() -> dict: def _get_default_COCO_schema() -> dict: - """Get the COCO schema file as a dictionary.""" + """Get the COCO schema as a dictionary.""" coco_schema_path = Path(__file__).parent / "schemas" / "COCO_schema.json" with open(coco_schema_path) as file: coco_schema_dict = json.load(file) @@ -39,7 +39,7 @@ def _check_file_is_json(filepath: Path): def _check_file_matches_schema(filepath: Path, schema: dict | None): - """Ensure that the input JSON file matches the given schema. + """Check the input JSON file matches the given schema. The schema validation only checks the type for each specified key if the key exists. It does not check that the keys in the @@ -62,7 +62,7 @@ def _check_file_matches_schema(filepath: Path, schema: dict | None): def _check_required_properties_keys( required_properties_keys: list, schema: dict ): - """Ensure the input schema includes the required "properties" keys.""" + """Check the input schema includes the required "properties" keys.""" # Get keys of "properties" dictionaries in schema properties_keys_in_schema = _extract_properties_keys(schema) diff --git a/ethology/annotations/validators.py b/ethology/annotations/validators.py index 8c912e2..bf5efae 100644 --- a/ethology/annotations/validators.py +++ b/ethology/annotations/validators.py @@ -18,28 +18,30 @@ class ValidVIA: """Class for valid VIA JSON files. - It checks the input file is a `ValidJSON` and additionally checks the - file contains the required keys. + It checks the input file is a valid JSON file, matches + the VIA schema and contains the required keys. + Attributes ---------- path : pathlib.Path - Path to the VIA JSON file. - + Path to the VIA JSON file, passed as an input. schema : dict - The JSON schema is set to VIA_SCHEMA. + The JSON schema is set to the default VIA schema. + required_keys : dict + The required keys for the VIA JSON file. Raises ------ - FileNotFoundError - If the file does not exist. ValueError If the JSON file cannot be decoded. jsonschema.exceptions.ValidationError If the type of any of the keys in the JSON file does not match the type specified in the schema. + jsonschema.exceptions.SchemaError + If the schema is invalid. ValueError - If the VIA JSON file misses any of the required keys. + If the VIA JSON file is missing any of the required keys. """ @@ -105,26 +107,31 @@ def _file_contains_required_keys(self, attribute, value): @define class ValidCOCO: - """Class valid COCO JSON files for untracked data. + """Class for valid COCO JSON files. - It checks the input COCO JSON file contains the required keys. + It checks the input file is a valid JSON file, matches + the COCO schema and contains the required keys. Attributes ---------- path : pathlib.Path - Path to the COCO JSON file. + Path to the COCO JSON file, passed as an input. + schema : dict + The JSON schema is set to the default COCO schema. + required_keys : dict + The required keys for the COCO JSON file. Raises ------ - FileNotFoundError - If the file does not exist. ValueError If the JSON file cannot be decoded. jsonschema.exceptions.ValidationError If the type of any of the keys in the JSON file does not match the type specified in the schema. + jsonschema.exceptions.SchemaError + If the schema is invalid. ValueError - If the COCO JSON file misses any of the required keys. + If the COCO JSON file is missing any of the required keys. """