From 74ca716bf1795ba97d7194278da60980f1f2d3f9 Mon Sep 17 00:00:00 2001 From: Ilya Trushkin Date: Sat, 22 Jun 2024 00:50:34 +0300 Subject: [PATCH] Add hierarchical ImageNet-like dataset format (#1528) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Summary CVS-143460 ### How to test ### Checklist - [x] I have added unit tests to cover my changes.​ - [ ] I have added integration tests to cover my changes.​ - [ ] I have added the description of my changes into [CHANGELOG](https://github.com/openvinotoolkit/datumaro/blob/develop/CHANGELOG.md).​ - [ ] I have updated the [documentation](https://github.com/openvinotoolkit/datumaro/tree/develop/docs) accordingly ### License - [x] I submit _my code changes_ under the same [MIT License](https://github.com/openvinotoolkit/datumaro/blob/develop/LICENSE) that covers the project. Feel free to contact the maintainers if that's a concern. - [ ] I have updated the license header for each file (see an example below). ```python # Copyright (C) 2024 Intel Corporation # # SPDX-License-Identifier: MIT ``` --------- Signed-off-by: Ilya Trushkin --- .../cli/commands/downloaders/kaggle.py | 2 +- src/datumaro/components/importer.py | 4 +- .../plugins/data_formats/image_dir.py | 28 +++- src/datumaro/plugins/data_formats/imagenet.py | 152 +++++++++++------- .../plugins/data_formats/yolo/exporter.py | 6 +- src/datumaro/plugins/specs.json | 10 +- src/datumaro/util/definitions.py | 1 + .../{label_0 => }/label_0_2.jpg | Bin .../label_1/label_1_1/label_1_1.jpg | Bin 0 -> 631 bytes .../test/{label_0 => }/label_0_2.jpg | Bin .../test/label_1/label_1_1/label_1_1.jpg | Bin 0 -> 631 bytes .../train/{label_0 => }/label_0_2.jpg | Bin .../train/label_1/label_1_1/label_1_1.jpg | Bin 0 -> 631 bytes .../val/{label_0 => }/label_0_2.jpg | Bin .../val/label_1/label_1_1/label_1_1.jpg | Bin 0 -> 631 bytes tests/unit/test_image_dir_format.py | 23 +-- tests/unit/test_imagenet_format.py | 13 +- 17 files changed, 133 insertions(+), 106 deletions(-) rename tests/assets/imagenet_dataset/{label_0 => }/label_0_2.jpg (100%) create mode 100644 tests/assets/imagenet_dataset/label_1/label_1_1/label_1_1.jpg rename tests/assets/imagenet_subsets_dataset/test/{label_0 => }/label_0_2.jpg (100%) create mode 100644 tests/assets/imagenet_subsets_dataset/test/label_1/label_1_1/label_1_1.jpg rename tests/assets/imagenet_subsets_dataset/train/{label_0 => }/label_0_2.jpg (100%) create mode 100644 tests/assets/imagenet_subsets_dataset/train/label_1/label_1_1/label_1_1.jpg rename tests/assets/imagenet_subsets_dataset/val/{label_0 => }/label_0_2.jpg (100%) create mode 100644 tests/assets/imagenet_subsets_dataset/val/label_1/label_1_1/label_1_1.jpg diff --git a/src/datumaro/cli/commands/downloaders/kaggle.py b/src/datumaro/cli/commands/downloaders/kaggle.py index 917ca288d5..b7127a2d50 100644 --- a/src/datumaro/cli/commands/downloaders/kaggle.py +++ b/src/datumaro/cli/commands/downloaders/kaggle.py @@ -30,7 +30,7 @@ def make_all_paths_absolute(d: Dict, root: str = "."): KAGGLE_API_KEY_EXISTS = bool(os.environ.get("KAGGLE_KEY")) or os.path.exists( - os.path.join(os.path.expanduser("~"), ".kaggle") + os.path.join(os.path.expanduser("~"), ".kaggle", "kaggle.json") ) diff --git a/src/datumaro/components/importer.py b/src/datumaro/components/importer.py index 128bf47eb3..886e6e1129 100644 --- a/src/datumaro/components/importer.py +++ b/src/datumaro/components/importer.py @@ -22,7 +22,7 @@ from datumaro.components.errors import DatasetImportError, DatasetNotFoundError from datumaro.components.format_detection import FormatDetectionConfidence, FormatDetectionContext from datumaro.components.merge.extractor_merger import ExtractorMerger -from datumaro.util.definitions import SUBSET_NAME_BLACKLIST +from datumaro.util.definitions import SUBSET_NAME_WHITELIST T = TypeVar("T") @@ -197,7 +197,7 @@ def _change_context_root_path(context: FormatDetectionContext, path: str): ) for sub_dir in os.listdir(path): - if sub_dir.lower() in SUBSET_NAME_BLACKLIST: + if sub_dir.lower() not in SUBSET_NAME_WHITELIST: continue sub_path = osp.join(path, sub_dir) diff --git a/src/datumaro/plugins/data_formats/image_dir.py b/src/datumaro/plugins/data_formats/image_dir.py index 04d1987198..e3a5dc1507 100644 --- a/src/datumaro/plugins/data_formats/image_dir.py +++ b/src/datumaro/plugins/data_formats/image_dir.py @@ -4,12 +4,12 @@ import logging as log import os -import os.path as osp +from pathlib import Path from typing import List, Optional from datumaro.components.dataset_base import DatasetItem, SubsetBase from datumaro.components.exporter import Exporter -from datumaro.components.format_detection import FormatDetectionConfidence +from datumaro.components.format_detection import FormatDetectionConfidence, FormatDetectionContext from datumaro.components.importer import ImportContext, Importer from datumaro.components.media import Image from datumaro.util.image import IMAGE_EXTENSIONS, find_images @@ -31,11 +31,23 @@ def build_cmdline_parser(cls, **kwargs): ) return parser + @classmethod + def detect(cls, context: FormatDetectionContext) -> FormatDetectionConfidence: + path = Path(context.root_path) + for item in path.iterdir(): + if item.is_dir(): + context.fail("Only flat image directories are supported") + elif item.suffix.lower() not in IMAGE_EXTENSIONS: + context.fail(f"File {item} is not an image.") + return super().detect(context) + @classmethod def find_sources(cls, path): - if not osp.isdir(path): + path = Path(path) + if not path.is_dir(): return [] - return [{"url": path, "format": ImageDirBase.NAME}] + + return [{"url": str(path), "format": ImageDirBase.NAME}] @classmethod def get_file_extensions(cls) -> List[str]: @@ -51,11 +63,11 @@ def __init__( ctx: Optional[ImportContext] = None, ): super().__init__(subset=subset, ctx=ctx) + url = Path(url) + assert url.is_dir(), url - assert osp.isdir(url), url - - for path in find_images(url, recursive=True): - item_id = osp.relpath(osp.splitext(path)[0], url) + for path in find_images(str(url)): + item_id = Path(path).stem self._items.append( DatasetItem(id=item_id, subset=self._subset, media=Image.from_file(path=path)) ) diff --git a/src/datumaro/plugins/data_formats/imagenet.py b/src/datumaro/plugins/data_formats/imagenet.py index 10dea16d0a..032c8eb9f4 100644 --- a/src/datumaro/plugins/data_formats/imagenet.py +++ b/src/datumaro/plugins/data_formats/imagenet.py @@ -5,9 +5,8 @@ import errno import logging as log import os -import os.path as osp -import warnings -from typing import List, Optional +from pathlib import Path +from typing import List from datumaro.components.annotation import AnnotationType, Label, LabelCategories from datumaro.components.dataset_base import DatasetItem, SubsetBase @@ -16,8 +15,9 @@ from datumaro.components.format_detection import FormatDetectionConfidence, FormatDetectionContext from datumaro.components.importer import ImportContext, Importer, with_subset_dirs from datumaro.components.media import Image -from datumaro.util.definitions import SUBSET_NAME_BLACKLIST +from datumaro.util.definitions import SUBSET_NAME_BLACKLIST, SUBSET_NAME_WHITELIST from datumaro.util.image import IMAGE_EXTENSIONS, find_images +from datumaro.util.os_util import walk class ImagenetPath: @@ -30,40 +30,39 @@ def __init__( self, path: str, *, - subset: Optional[str] = None, - ctx: Optional[ImportContext] = None, + subset: str | None = None, + ctx: ImportContext | None = None, + min_depth: int | None = None, + max_depth: int | None = None, ): - if not osp.isdir(path): + if not Path(path).is_dir(): raise NotADirectoryError(errno.ENOTDIR, "Can't find dataset directory", path) - super().__init__(subset=subset, ctx=ctx) - + self._max_depth = min_depth + self._min_depth = max_depth self._categories = self._load_categories(path) self._items = list(self._load_items(path).values()) def _load_categories(self, path): label_cat = LabelCategories() - for dirname in sorted(os.listdir(path)): - if not os.path.isdir(os.path.join(path, dirname)): - warnings.warn( - f"{dirname} is not a directory in the folder {path}, so this will" - "be skipped when declaring the cateogries of `imagenet` dataset." - ) - continue - if dirname != ImagenetPath.IMAGE_DIR_NO_LABEL: - label_cat.add(dirname) + path = Path(path) + for dirname in sorted(d for d in path.rglob("*") if d.is_dir()): + dirname = dirname.relative_to(path) + if str(dirname) != ImagenetPath.IMAGE_DIR_NO_LABEL: + label_cat.add(str(dirname)) return {AnnotationType.label: label_cat} def _load_items(self, path): items = {} - # Images should be in root/label_dir/*.img and root/*.img is not allowed. - # => max_depth=1, min_depth=1 - for image_path in find_images(path, recursive=True, max_depth=1, min_depth=1): - label = osp.basename(osp.dirname(image_path)) - image_name = osp.splitext(osp.basename(image_path))[0] - - item_id = label + ImagenetPath.SEP_TOKEN + image_name + for image_path in find_images( + path, recursive=True, max_depth=self._max_depth, min_depth=self._min_depth + ): + label = str(Path(image_path).parent.relative_to(path)) + if label == ".": # image is located in the root directory + label = ImagenetPath.IMAGE_DIR_NO_LABEL + image_name = Path(image_path).stem + item_id = str(label) + ImagenetPath.SEP_TOKEN + image_name item = items.get(item_id) try: if item is None: @@ -89,45 +88,70 @@ def _load_items(self, path): class ImagenetImporter(Importer): - """TorchVision's ImageFolder style importer. - For example, it imports the following directory structure. + """ + Multi-level version of ImagenetImporter. + For example, it imports the following directory structure. .. code-block:: text root ├── label_0 - │ ├── label_0_1.jpg - │ └── label_0_2.jpg + │ ├── label_0_1 + │ │ └── img1.jpg + │ └── label_0_2 + │ └── img2.jpg └── label_1 - └── label_1_1.jpg + └── img3.jpg """ + _MIN_DEPTH = None + _MAX_DEPTH = None + _FORMAT = ImagenetBase.NAME + DETECT_CONFIDENCE = FormatDetectionConfidence.EXTREME_LOW + @classmethod def detect(cls, context: FormatDetectionContext) -> FormatDetectionConfidence: # Images must not be under a directory whose name is blacklisted. - for dname in os.listdir(context.root_path): - dpath = osp.join(context.root_path, dname) - if osp.isdir(dpath) and dname.lower() in SUBSET_NAME_BLACKLIST: + for dname, dirnames, filenames in os.walk(context.root_path): + if dname in SUBSET_NAME_WHITELIST: context.fail( - f"{dname} is found in {context.root_path}. " - "However, Images must not be under a directory whose name is blacklisted " - f"(SUBSET_NAME_BLACKLIST={SUBSET_NAME_BLACKLIST})." + f"Following directory names are not permitted: {SUBSET_NAME_WHITELIST}" ) + rel_dname = Path(dname).relative_to(context.root_path) + level = len(rel_dname.parts) + if cls._MIN_DEPTH is not None and level < cls._MIN_DEPTH and filenames: + context.fail("Found files out of the directory level bounds.") + if cls._MAX_DEPTH is not None and level > cls._MAX_DEPTH and filenames: + context.fail("Found files out of the directory level bounds.") + dpath = Path(context.root_path) / rel_dname + if dpath.is_dir(): + if str(rel_dname).lower() in SUBSET_NAME_BLACKLIST: + context.fail( + f"{dname} is found in {context.root_path}. " + "However, Images must not be under a directory whose name is blacklisted " + f"(SUBSET_NAME_BLACKLIST={SUBSET_NAME_BLACKLIST})." + ) return super().detect(context) + @classmethod + def contains_only_images(cls, path: str | Path): + for _, dirnames, filenames in walk(path, cls._MAX_DEPTH, cls._MIN_DEPTH): + if filenames: + for filename in filenames: + if Path(filename).suffix.lower() not in IMAGE_EXTENSIONS: + return False + elif not dirnames: + return False + return True + @classmethod def find_sources(cls, path): - if not osp.isdir(path): + if not Path(path).is_dir(): return [] - # Images should be in root/label_dir/*.img and root/*.img is not allowed. - # => max_depth=1, min_depth=1 - for _ in find_images(path, recursive=True, max_depth=1, min_depth=1): - return [{"url": path, "format": ImagenetBase.NAME}] - - return [] + return [{"url": path, "format": cls._FORMAT}] if cls.contains_only_images(path) else [] @classmethod def get_file_extensions(cls) -> List[str]: @@ -144,32 +168,36 @@ def build_cmdline_parser(cls, **kwargs): @with_subset_dirs class ImagenetWithSubsetDirsImporter(ImagenetImporter): - """TorchVision ImageFolder style importer. - For example, it imports the following directory structure. + """Multi-level image directory structure importer. + Example: .. code-block:: root ├── train │ ├── label_0 - │ │ ├── label_0_1.jpg - │ │ └── label_0_2.jpg + │ │ ├── label_0_1 + │ │ │ └── img1.jpg + │ │ └── label_0_2 + │ │ └── img2.jpg │ └── label_1 - │ └── label_1_1.jpg + │ └── img3.jpg ├── val │ ├── label_0 - │ │ ├── label_0_1.jpg - │ │ └── label_0_2.jpg + │ │ ├── label_0_1 + │ │ │ └── img1.jpg + │ │ └── label_0_2 + │ │ └── img2.jpg │ └── label_1 - │ └── label_1_1.jpg + │ └── img3.jpg └── test - ├── label_0 - │ ├── label_0_1.jpg - │ └── label_0_2.jpg + │ ├── label_0 + │ ├── label_0_1 + │ │ └── img1.jpg + │ └── label_0_2 + │ └── img2.jpg └── label_1 - └── label_1_1.jpg - - Then, it will have three subsets: train, val, and test and they have label_0 and label_1 labels. + └── img3.jpg """ @@ -199,7 +227,7 @@ def _get_name(item: DatasetItem) -> str: 'For example, dataset.export("", format="imagenet_with_subset_dirs").' ) - root_dir = self._save_dir + root_dir = Path(self._save_dir) extractor = self._extractor labels = {} for item in self._extractor: @@ -210,18 +238,18 @@ def _get_name(item: DatasetItem) -> str: label_name = extractor.categories()[AnnotationType.label][label].name self._save_image( item, - subdir=osp.join(root_dir, item.subset, label_name) + subdir=root_dir / item.subset / label_name if self.USE_SUBSET_DIRS - else osp.join(root_dir, label_name), + else root_dir / label_name, name=file_name, ) if not labels: self._save_image( item, - subdir=osp.join(root_dir, item.subset, ImagenetPath.IMAGE_DIR_NO_LABEL) + subdir=root_dir / item.subset / ImagenetPath.IMAGE_DIR_NO_LABEL if self.USE_SUBSET_DIRS - else osp.join(root_dir, ImagenetPath.IMAGE_DIR_NO_LABEL), + else root_dir / ImagenetPath.IMAGE_DIR_NO_LABEL, name=file_name, ) diff --git a/src/datumaro/plugins/data_formats/yolo/exporter.py b/src/datumaro/plugins/data_formats/yolo/exporter.py index 3cfbeb3994..e74989df62 100644 --- a/src/datumaro/plugins/data_formats/yolo/exporter.py +++ b/src/datumaro/plugins/data_formats/yolo/exporter.py @@ -15,6 +15,7 @@ from datumaro.components.exporter import Exporter from datumaro.components.media import Image from datumaro.util import str_to_bool +from datumaro.util.definitions import SUBSET_NAME_WHITELIST from .format import YoloPath @@ -195,7 +196,6 @@ def can_stream(self) -> bool: class YoloUltralyticsExporter(YoloExporter): - allowed_subset_names = {"train", "val", "test"} must_subset_names = {"train", "val"} def __init__(self, extractor: IDataset, save_dir: str, **kwargs) -> None: @@ -214,9 +214,9 @@ def _check_dataset(self): subset_names = set(self._extractor.subsets().keys()) for subset in subset_names: - if subset not in self.allowed_subset_names: + if subset not in SUBSET_NAME_WHITELIST: raise DatasetExportError( - f"The allowed subset name is in {self.allowed_subset_names}, " + f"The allowed subset name should be in {SUBSET_NAME_WHITELIST}, " f'so that subset "{subset}" is not allowed.' ) diff --git a/src/datumaro/plugins/specs.json b/src/datumaro/plugins/specs.json index c8afe2e4bc..8891b79287 100644 --- a/src/datumaro/plugins/specs.json +++ b/src/datumaro/plugins/specs.json @@ -1834,6 +1834,11 @@ "plugin_name": "anns_to_labels", "plugin_type": "Transform" }, + { + "import_path": "datumaro.plugins.transforms.AstypeAnnotations", + "plugin_name": "astype_annotations", + "plugin_type": "Transform" + }, { "import_path": "datumaro.plugins.transforms.BboxValuesDecrement", "plugin_name": "bbox_values_decrement", @@ -1954,11 +1959,6 @@ "plugin_name": "sort", "plugin_type": "Transform" }, - { - "import_path": "datumaro.plugins.transforms.AstypeAnnotations", - "plugin_name": "astype_annotations", - "plugin_type": "Transform" - }, { "import_path": "datumaro.plugins.validators.ClassificationValidator", "plugin_name": "classification", diff --git a/src/datumaro/util/definitions.py b/src/datumaro/util/definitions.py index c16db86807..9882ead8f5 100644 --- a/src/datumaro/util/definitions.py +++ b/src/datumaro/util/definitions.py @@ -10,6 +10,7 @@ DEFAULT_SUBSET_NAME = "default" BboxIntCoords = Tuple[int, int, int, int] # (x, y, w, h) SUBSET_NAME_BLACKLIST = {"labels", "images", "annotations", "instances"} +SUBSET_NAME_WHITELIST = {"train", "test", "val"} def get_datumaro_cache_dir( diff --git a/tests/assets/imagenet_dataset/label_0/label_0_2.jpg b/tests/assets/imagenet_dataset/label_0_2.jpg similarity index 100% rename from tests/assets/imagenet_dataset/label_0/label_0_2.jpg rename to tests/assets/imagenet_dataset/label_0_2.jpg diff --git a/tests/assets/imagenet_dataset/label_1/label_1_1/label_1_1.jpg b/tests/assets/imagenet_dataset/label_1/label_1_1/label_1_1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9d28e0c15e09d6a84d9adf911075171c481c09ac GIT binary patch literal 631 zcmex=^(PF6}rMnOeST|r4lSw=>~TvNxu(8R<gTWM0TY@5u?V53ptdXHXalWy7)oGIH{I3zSIJR&kGIVCkMJtH%#xTLhKyrQzI zxuvzOy`!^h(&Q;qr%j(RbJn88OO`HMzGCI7O`ErD-L`$l&RvHNA31vL_=%IJE?vHI z_1g6tH*YuS~;l_iU%Emz-M3agxa*3&!JXHM%@*3D@ k#CfcVET6$WhVa)d1|DWcVB|3iGT1YG;L=#sVE_Ln0Q-o|ng9R* literal 0 HcmV?d00001 diff --git a/tests/assets/imagenet_subsets_dataset/test/label_0/label_0_2.jpg b/tests/assets/imagenet_subsets_dataset/test/label_0_2.jpg similarity index 100% rename from tests/assets/imagenet_subsets_dataset/test/label_0/label_0_2.jpg rename to tests/assets/imagenet_subsets_dataset/test/label_0_2.jpg diff --git a/tests/assets/imagenet_subsets_dataset/test/label_1/label_1_1/label_1_1.jpg b/tests/assets/imagenet_subsets_dataset/test/label_1/label_1_1/label_1_1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9d28e0c15e09d6a84d9adf911075171c481c09ac GIT binary patch literal 631 zcmex=^(PF6}rMnOeST|r4lSw=>~TvNxu(8R<gTWM0TY@5u?V53ptdXHXalWy7)oGIH{I3zSIJR&kGIVCkMJtH%#xTLhKyrQzI zxuvzOy`!^h(&Q;qr%j(RbJn88OO`HMzGCI7O`ErD-L`$l&RvHNA31vL_=%IJE?vHI z_1g6tH*YuS~;l_iU%Emz-M3agxa*3&!JXHM%@*3D@ k#CfcVET6$WhVa)d1|DWcVB|3iGT1YG;L=#sVE_Ln0Q-o|ng9R* literal 0 HcmV?d00001 diff --git a/tests/assets/imagenet_subsets_dataset/train/label_0/label_0_2.jpg b/tests/assets/imagenet_subsets_dataset/train/label_0_2.jpg similarity index 100% rename from tests/assets/imagenet_subsets_dataset/train/label_0/label_0_2.jpg rename to tests/assets/imagenet_subsets_dataset/train/label_0_2.jpg diff --git a/tests/assets/imagenet_subsets_dataset/train/label_1/label_1_1/label_1_1.jpg b/tests/assets/imagenet_subsets_dataset/train/label_1/label_1_1/label_1_1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9d28e0c15e09d6a84d9adf911075171c481c09ac GIT binary patch literal 631 zcmex=^(PF6}rMnOeST|r4lSw=>~TvNxu(8R<gTWM0TY@5u?V53ptdXHXalWy7)oGIH{I3zSIJR&kGIVCkMJtH%#xTLhKyrQzI zxuvzOy`!^h(&Q;qr%j(RbJn88OO`HMzGCI7O`ErD-L`$l&RvHNA31vL_=%IJE?vHI z_1g6tH*YuS~;l_iU%Emz-M3agxa*3&!JXHM%@*3D@ k#CfcVET6$WhVa)d1|DWcVB|3iGT1YG;L=#sVE_Ln0Q-o|ng9R* literal 0 HcmV?d00001 diff --git a/tests/assets/imagenet_subsets_dataset/val/label_0/label_0_2.jpg b/tests/assets/imagenet_subsets_dataset/val/label_0_2.jpg similarity index 100% rename from tests/assets/imagenet_subsets_dataset/val/label_0/label_0_2.jpg rename to tests/assets/imagenet_subsets_dataset/val/label_0_2.jpg diff --git a/tests/assets/imagenet_subsets_dataset/val/label_1/label_1_1/label_1_1.jpg b/tests/assets/imagenet_subsets_dataset/val/label_1/label_1_1/label_1_1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9d28e0c15e09d6a84d9adf911075171c481c09ac GIT binary patch literal 631 zcmex=^(PF6}rMnOeST|r4lSw=>~TvNxu(8R<gTWM0TY@5u?V53ptdXHXalWy7)oGIH{I3zSIJR&kGIVCkMJtH%#xTLhKyrQzI zxuvzOy`!^h(&Q;qr%j(RbJn88OO`HMzGCI7O`ErD-L`$l&RvHNA31vL_=%IJE?vHI z_1g6tH*YuS~;l_iU%Emz-M3agxa*3&!JXHM%@*3D@ k#CfcVET6$WhVa)d1|DWcVB|3iGT1YG;L=#sVE_Ln0Q-o|ng9R* literal 0 HcmV?d00001 diff --git a/tests/unit/test_image_dir_format.py b/tests/unit/test_image_dir_format.py index ddbd0e5414..7389ebab36 100644 --- a/tests/unit/test_image_dir_format.py +++ b/tests/unit/test_image_dir_format.py @@ -32,21 +32,6 @@ def test_can_load(self): require_media=True, ) - @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_relative_paths(self): - dataset = Dataset.from_iterable( - [ - DatasetItem(id="1", media=Image.from_numpy(data=np.ones((4, 2, 3)))), - DatasetItem(id="subdir1/1", media=Image.from_numpy(data=np.ones((2, 6, 3)))), - DatasetItem(id="subdir2/1", media=Image.from_numpy(data=np.ones((5, 4, 3)))), - ] - ) - - with TestDir() as test_dir: - check_save_and_load( - self, dataset, ImageDirExporter.convert, test_dir, importer="image_dir" - ) - @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self): dataset = Dataset.from_iterable( @@ -66,12 +51,8 @@ def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self): def test_can_save_and_load_image_with_arbitrary_extension(self): dataset = Dataset.from_iterable( [ - DatasetItem( - id="q/1", media=Image.from_numpy(data=np.zeros((4, 3, 3)), ext=".JPEG") - ), - DatasetItem( - id="a/b/c/2", media=Image.from_numpy(data=np.zeros((3, 4, 3)), ext=".bmp") - ), + DatasetItem(id="1", media=Image.from_numpy(data=np.zeros((4, 3, 3)), ext=".JPEG")), + DatasetItem(id="2", media=Image.from_numpy(data=np.zeros((3, 4, 3)), ext=".bmp")), ] ) diff --git a/tests/unit/test_imagenet_format.py b/tests/unit/test_imagenet_format.py index dd4e6d009e..e650a5c7d2 100644 --- a/tests/unit/test_imagenet_format.py +++ b/tests/unit/test_imagenet_format.py @@ -1,5 +1,6 @@ import pickle # nosec B403 from copy import deepcopy +from pathlib import Path from unittest import TestCase import numpy as np @@ -189,9 +190,13 @@ def _create_expected_dataset(self): annotations=[Label(0)], ), DatasetItem( - id="label_0:label_0_2", + id="no_label:label_0_2", media=Image.from_numpy(data=np.ones((10, 10, 3))), - annotations=[Label(0)], + ), + DatasetItem( + id=f"{Path('label_1', 'label_1_1')}:label_1_1", + media=Image.from_numpy(data=np.ones((8, 8, 3))), + annotations=[Label(2)], ), DatasetItem( id="label_1:label_1_1", @@ -201,7 +206,7 @@ def _create_expected_dataset(self): ], categories={ AnnotationType.label: LabelCategories.from_iterable( - "label_" + str(label) for label in range(2) + ("label_0", "label_1", f"{Path('label_1', 'label_1_1')}") ), }, ) @@ -234,7 +239,7 @@ class ImagenetWithSubsetDirsImporterTest(ImagenetImporterTest): @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_import(self): - dataset = Dataset.import_from(self.DUMMY_DATASET_DIR, "imagenet_with_subset_dirs") + dataset = Dataset.import_from(self.DUMMY_DATASET_DIR, self.FORMAT_NAME) for subset_name, subset in dataset.subsets().items(): expected_dataset = self._create_expected_dataset().transform(