From b7b7ffb2e7f9a8bb47e685a5cf813d0c3bcd6495 Mon Sep 17 00:00:00 2001 From: sooahleex Date: Sat, 22 Jun 2024 16:17:34 +0900 Subject: [PATCH] Revert "Add hierarchical ImageNet-like dataset format (#1528)" This reverts commit c296000be8ec19ffad95d370048a708da370659c. --- .../cli/commands/downloaders/kaggle.py | 2 +- src/datumaro/components/importer.py | 4 +- .../plugins/data_formats/image_dir.py | 28 +--- src/datumaro/plugins/data_formats/imagenet.py | 152 +++++++----------- .../plugins/data_formats/yolo/exporter.py | 6 +- src/datumaro/plugins/specs.json | 10 +- src/datumaro/util/definitions.py | 1 - .../{ => label_0}/label_0_2.jpg | Bin .../label_1/label_1_1/label_1_1.jpg | Bin 631 -> 0 bytes .../test/{ => label_0}/label_0_2.jpg | Bin .../test/label_1/label_1_1/label_1_1.jpg | Bin 631 -> 0 bytes .../train/{ => label_0}/label_0_2.jpg | Bin .../train/label_1/label_1_1/label_1_1.jpg | Bin 631 -> 0 bytes .../val/{ => label_0}/label_0_2.jpg | Bin .../val/label_1/label_1_1/label_1_1.jpg | Bin 631 -> 0 bytes tests/unit/test_image_dir_format.py | 23 ++- tests/unit/test_imagenet_format.py | 13 +- 17 files changed, 106 insertions(+), 133 deletions(-) rename tests/assets/imagenet_dataset/{ => label_0}/label_0_2.jpg (100%) delete mode 100644 tests/assets/imagenet_dataset/label_1/label_1_1/label_1_1.jpg rename tests/assets/imagenet_subsets_dataset/test/{ => label_0}/label_0_2.jpg (100%) delete mode 100644 tests/assets/imagenet_subsets_dataset/test/label_1/label_1_1/label_1_1.jpg rename tests/assets/imagenet_subsets_dataset/train/{ => label_0}/label_0_2.jpg (100%) delete mode 100644 tests/assets/imagenet_subsets_dataset/train/label_1/label_1_1/label_1_1.jpg rename tests/assets/imagenet_subsets_dataset/val/{ => label_0}/label_0_2.jpg (100%) delete mode 100644 tests/assets/imagenet_subsets_dataset/val/label_1/label_1_1/label_1_1.jpg diff --git a/src/datumaro/cli/commands/downloaders/kaggle.py b/src/datumaro/cli/commands/downloaders/kaggle.py index b7127a2d50..917ca288d5 100644 --- a/src/datumaro/cli/commands/downloaders/kaggle.py +++ b/src/datumaro/cli/commands/downloaders/kaggle.py @@ -30,7 +30,7 @@ def make_all_paths_absolute(d: Dict, root: str = "."): KAGGLE_API_KEY_EXISTS = bool(os.environ.get("KAGGLE_KEY")) or os.path.exists( - os.path.join(os.path.expanduser("~"), ".kaggle", "kaggle.json") + os.path.join(os.path.expanduser("~"), ".kaggle") ) diff --git a/src/datumaro/components/importer.py b/src/datumaro/components/importer.py index 886e6e1129..128bf47eb3 100644 --- a/src/datumaro/components/importer.py +++ b/src/datumaro/components/importer.py @@ -22,7 +22,7 @@ from datumaro.components.errors import DatasetImportError, DatasetNotFoundError from datumaro.components.format_detection import FormatDetectionConfidence, FormatDetectionContext from datumaro.components.merge.extractor_merger import ExtractorMerger -from datumaro.util.definitions import SUBSET_NAME_WHITELIST +from datumaro.util.definitions import SUBSET_NAME_BLACKLIST T = TypeVar("T") @@ -197,7 +197,7 @@ def _change_context_root_path(context: FormatDetectionContext, path: str): ) for sub_dir in os.listdir(path): - if sub_dir.lower() not in SUBSET_NAME_WHITELIST: + if sub_dir.lower() in SUBSET_NAME_BLACKLIST: continue sub_path = osp.join(path, sub_dir) diff --git a/src/datumaro/plugins/data_formats/image_dir.py b/src/datumaro/plugins/data_formats/image_dir.py index e3a5dc1507..04d1987198 100644 --- a/src/datumaro/plugins/data_formats/image_dir.py +++ b/src/datumaro/plugins/data_formats/image_dir.py @@ -4,12 +4,12 @@ import logging as log import os -from pathlib import Path +import os.path as osp from typing import List, Optional from datumaro.components.dataset_base import DatasetItem, SubsetBase from datumaro.components.exporter import Exporter -from datumaro.components.format_detection import FormatDetectionConfidence, FormatDetectionContext +from datumaro.components.format_detection import FormatDetectionConfidence from datumaro.components.importer import ImportContext, Importer from datumaro.components.media import Image from datumaro.util.image import IMAGE_EXTENSIONS, find_images @@ -31,23 +31,11 @@ def build_cmdline_parser(cls, **kwargs): ) return parser - @classmethod - def detect(cls, context: FormatDetectionContext) -> FormatDetectionConfidence: - path = Path(context.root_path) - for item in path.iterdir(): - if item.is_dir(): - context.fail("Only flat image directories are supported") - elif item.suffix.lower() not in IMAGE_EXTENSIONS: - context.fail(f"File {item} is not an image.") - return super().detect(context) - @classmethod def find_sources(cls, path): - path = Path(path) - if not path.is_dir(): + if not osp.isdir(path): return [] - - return [{"url": str(path), "format": ImageDirBase.NAME}] + return [{"url": path, "format": ImageDirBase.NAME}] @classmethod def get_file_extensions(cls) -> List[str]: @@ -63,11 +51,11 @@ def __init__( ctx: Optional[ImportContext] = None, ): super().__init__(subset=subset, ctx=ctx) - url = Path(url) - assert url.is_dir(), url - for path in find_images(str(url)): - item_id = Path(path).stem + assert osp.isdir(url), url + + for path in find_images(url, recursive=True): + item_id = osp.relpath(osp.splitext(path)[0], url) self._items.append( DatasetItem(id=item_id, subset=self._subset, media=Image.from_file(path=path)) ) diff --git a/src/datumaro/plugins/data_formats/imagenet.py b/src/datumaro/plugins/data_formats/imagenet.py index 032c8eb9f4..10dea16d0a 100644 --- a/src/datumaro/plugins/data_formats/imagenet.py +++ b/src/datumaro/plugins/data_formats/imagenet.py @@ -5,8 +5,9 @@ import errno import logging as log import os -from pathlib import Path -from typing import List +import os.path as osp +import warnings +from typing import List, Optional from datumaro.components.annotation import AnnotationType, Label, LabelCategories from datumaro.components.dataset_base import DatasetItem, SubsetBase @@ -15,9 +16,8 @@ from datumaro.components.format_detection import FormatDetectionConfidence, FormatDetectionContext from datumaro.components.importer import ImportContext, Importer, with_subset_dirs from datumaro.components.media import Image -from datumaro.util.definitions import SUBSET_NAME_BLACKLIST, SUBSET_NAME_WHITELIST +from datumaro.util.definitions import SUBSET_NAME_BLACKLIST from datumaro.util.image import IMAGE_EXTENSIONS, find_images -from datumaro.util.os_util import walk class ImagenetPath: @@ -30,39 +30,40 @@ def __init__( self, path: str, *, - subset: str | None = None, - ctx: ImportContext | None = None, - min_depth: int | None = None, - max_depth: int | None = None, + subset: Optional[str] = None, + ctx: Optional[ImportContext] = None, ): - if not Path(path).is_dir(): + if not osp.isdir(path): raise NotADirectoryError(errno.ENOTDIR, "Can't find dataset directory", path) + super().__init__(subset=subset, ctx=ctx) - self._max_depth = min_depth - self._min_depth = max_depth + self._categories = self._load_categories(path) self._items = list(self._load_items(path).values()) def _load_categories(self, path): label_cat = LabelCategories() - path = Path(path) - for dirname in sorted(d for d in path.rglob("*") if d.is_dir()): - dirname = dirname.relative_to(path) - if str(dirname) != ImagenetPath.IMAGE_DIR_NO_LABEL: - label_cat.add(str(dirname)) + for dirname in sorted(os.listdir(path)): + if not os.path.isdir(os.path.join(path, dirname)): + warnings.warn( + f"{dirname} is not a directory in the folder {path}, so this will" + "be skipped when declaring the cateogries of `imagenet` dataset." + ) + continue + if dirname != ImagenetPath.IMAGE_DIR_NO_LABEL: + label_cat.add(dirname) return {AnnotationType.label: label_cat} def _load_items(self, path): items = {} - for image_path in find_images( - path, recursive=True, max_depth=self._max_depth, min_depth=self._min_depth - ): - label = str(Path(image_path).parent.relative_to(path)) - if label == ".": # image is located in the root directory - label = ImagenetPath.IMAGE_DIR_NO_LABEL - image_name = Path(image_path).stem - item_id = str(label) + ImagenetPath.SEP_TOKEN + image_name + # Images should be in root/label_dir/*.img and root/*.img is not allowed. + # => max_depth=1, min_depth=1 + for image_path in find_images(path, recursive=True, max_depth=1, min_depth=1): + label = osp.basename(osp.dirname(image_path)) + image_name = osp.splitext(osp.basename(image_path))[0] + + item_id = label + ImagenetPath.SEP_TOKEN + image_name item = items.get(item_id) try: if item is None: @@ -88,70 +89,45 @@ def _load_items(self, path): class ImagenetImporter(Importer): - """ - Multi-level version of ImagenetImporter. - For example, it imports the following directory structure. + """TorchVision's ImageFolder style importer. + For example, it imports the following directory structure. .. code-block:: text root ├── label_0 - │ ├── label_0_1 - │ │ └── img1.jpg - │ └── label_0_2 - │ └── img2.jpg + │ ├── label_0_1.jpg + │ └── label_0_2.jpg └── label_1 - └── img3.jpg + └── label_1_1.jpg """ - _MIN_DEPTH = None - _MAX_DEPTH = None - _FORMAT = ImagenetBase.NAME - DETECT_CONFIDENCE = FormatDetectionConfidence.EXTREME_LOW - @classmethod def detect(cls, context: FormatDetectionContext) -> FormatDetectionConfidence: # Images must not be under a directory whose name is blacklisted. - for dname, dirnames, filenames in os.walk(context.root_path): - if dname in SUBSET_NAME_WHITELIST: + for dname in os.listdir(context.root_path): + dpath = osp.join(context.root_path, dname) + if osp.isdir(dpath) and dname.lower() in SUBSET_NAME_BLACKLIST: context.fail( - f"Following directory names are not permitted: {SUBSET_NAME_WHITELIST}" + f"{dname} is found in {context.root_path}. " + "However, Images must not be under a directory whose name is blacklisted " + f"(SUBSET_NAME_BLACKLIST={SUBSET_NAME_BLACKLIST})." ) - rel_dname = Path(dname).relative_to(context.root_path) - level = len(rel_dname.parts) - if cls._MIN_DEPTH is not None and level < cls._MIN_DEPTH and filenames: - context.fail("Found files out of the directory level bounds.") - if cls._MAX_DEPTH is not None and level > cls._MAX_DEPTH and filenames: - context.fail("Found files out of the directory level bounds.") - dpath = Path(context.root_path) / rel_dname - if dpath.is_dir(): - if str(rel_dname).lower() in SUBSET_NAME_BLACKLIST: - context.fail( - f"{dname} is found in {context.root_path}. " - "However, Images must not be under a directory whose name is blacklisted " - f"(SUBSET_NAME_BLACKLIST={SUBSET_NAME_BLACKLIST})." - ) return super().detect(context) - @classmethod - def contains_only_images(cls, path: str | Path): - for _, dirnames, filenames in walk(path, cls._MAX_DEPTH, cls._MIN_DEPTH): - if filenames: - for filename in filenames: - if Path(filename).suffix.lower() not in IMAGE_EXTENSIONS: - return False - elif not dirnames: - return False - return True - @classmethod def find_sources(cls, path): - if not Path(path).is_dir(): + if not osp.isdir(path): return [] - return [{"url": path, "format": cls._FORMAT}] if cls.contains_only_images(path) else [] + # Images should be in root/label_dir/*.img and root/*.img is not allowed. + # => max_depth=1, min_depth=1 + for _ in find_images(path, recursive=True, max_depth=1, min_depth=1): + return [{"url": path, "format": ImagenetBase.NAME}] + + return [] @classmethod def get_file_extensions(cls) -> List[str]: @@ -168,36 +144,32 @@ def build_cmdline_parser(cls, **kwargs): @with_subset_dirs class ImagenetWithSubsetDirsImporter(ImagenetImporter): - """Multi-level image directory structure importer. - Example: + """TorchVision ImageFolder style importer. + For example, it imports the following directory structure. .. code-block:: root ├── train │ ├── label_0 - │ │ ├── label_0_1 - │ │ │ └── img1.jpg - │ │ └── label_0_2 - │ │ └── img2.jpg + │ │ ├── label_0_1.jpg + │ │ └── label_0_2.jpg │ └── label_1 - │ └── img3.jpg + │ └── label_1_1.jpg ├── val │ ├── label_0 - │ │ ├── label_0_1 - │ │ │ └── img1.jpg - │ │ └── label_0_2 - │ │ └── img2.jpg + │ │ ├── label_0_1.jpg + │ │ └── label_0_2.jpg │ └── label_1 - │ └── img3.jpg + │ └── label_1_1.jpg └── test - │ ├── label_0 - │ ├── label_0_1 - │ │ └── img1.jpg - │ └── label_0_2 - │ └── img2.jpg + ├── label_0 + │ ├── label_0_1.jpg + │ └── label_0_2.jpg └── label_1 - └── img3.jpg + └── label_1_1.jpg + + Then, it will have three subsets: train, val, and test and they have label_0 and label_1 labels. """ @@ -227,7 +199,7 @@ def _get_name(item: DatasetItem) -> str: 'For example, dataset.export("", format="imagenet_with_subset_dirs").' ) - root_dir = Path(self._save_dir) + root_dir = self._save_dir extractor = self._extractor labels = {} for item in self._extractor: @@ -238,18 +210,18 @@ def _get_name(item: DatasetItem) -> str: label_name = extractor.categories()[AnnotationType.label][label].name self._save_image( item, - subdir=root_dir / item.subset / label_name + subdir=osp.join(root_dir, item.subset, label_name) if self.USE_SUBSET_DIRS - else root_dir / label_name, + else osp.join(root_dir, label_name), name=file_name, ) if not labels: self._save_image( item, - subdir=root_dir / item.subset / ImagenetPath.IMAGE_DIR_NO_LABEL + subdir=osp.join(root_dir, item.subset, ImagenetPath.IMAGE_DIR_NO_LABEL) if self.USE_SUBSET_DIRS - else root_dir / ImagenetPath.IMAGE_DIR_NO_LABEL, + else osp.join(root_dir, ImagenetPath.IMAGE_DIR_NO_LABEL), name=file_name, ) diff --git a/src/datumaro/plugins/data_formats/yolo/exporter.py b/src/datumaro/plugins/data_formats/yolo/exporter.py index e74989df62..3cfbeb3994 100644 --- a/src/datumaro/plugins/data_formats/yolo/exporter.py +++ b/src/datumaro/plugins/data_formats/yolo/exporter.py @@ -15,7 +15,6 @@ from datumaro.components.exporter import Exporter from datumaro.components.media import Image from datumaro.util import str_to_bool -from datumaro.util.definitions import SUBSET_NAME_WHITELIST from .format import YoloPath @@ -196,6 +195,7 @@ def can_stream(self) -> bool: class YoloUltralyticsExporter(YoloExporter): + allowed_subset_names = {"train", "val", "test"} must_subset_names = {"train", "val"} def __init__(self, extractor: IDataset, save_dir: str, **kwargs) -> None: @@ -214,9 +214,9 @@ def _check_dataset(self): subset_names = set(self._extractor.subsets().keys()) for subset in subset_names: - if subset not in SUBSET_NAME_WHITELIST: + if subset not in self.allowed_subset_names: raise DatasetExportError( - f"The allowed subset name should be in {SUBSET_NAME_WHITELIST}, " + f"The allowed subset name is in {self.allowed_subset_names}, " f'so that subset "{subset}" is not allowed.' ) diff --git a/src/datumaro/plugins/specs.json b/src/datumaro/plugins/specs.json index 8891b79287..c8afe2e4bc 100644 --- a/src/datumaro/plugins/specs.json +++ b/src/datumaro/plugins/specs.json @@ -1834,11 +1834,6 @@ "plugin_name": "anns_to_labels", "plugin_type": "Transform" }, - { - "import_path": "datumaro.plugins.transforms.AstypeAnnotations", - "plugin_name": "astype_annotations", - "plugin_type": "Transform" - }, { "import_path": "datumaro.plugins.transforms.BboxValuesDecrement", "plugin_name": "bbox_values_decrement", @@ -1959,6 +1954,11 @@ "plugin_name": "sort", "plugin_type": "Transform" }, + { + "import_path": "datumaro.plugins.transforms.AstypeAnnotations", + "plugin_name": "astype_annotations", + "plugin_type": "Transform" + }, { "import_path": "datumaro.plugins.validators.ClassificationValidator", "plugin_name": "classification", diff --git a/src/datumaro/util/definitions.py b/src/datumaro/util/definitions.py index 9882ead8f5..c16db86807 100644 --- a/src/datumaro/util/definitions.py +++ b/src/datumaro/util/definitions.py @@ -10,7 +10,6 @@ DEFAULT_SUBSET_NAME = "default" BboxIntCoords = Tuple[int, int, int, int] # (x, y, w, h) SUBSET_NAME_BLACKLIST = {"labels", "images", "annotations", "instances"} -SUBSET_NAME_WHITELIST = {"train", "test", "val"} def get_datumaro_cache_dir( diff --git a/tests/assets/imagenet_dataset/label_0_2.jpg b/tests/assets/imagenet_dataset/label_0/label_0_2.jpg similarity index 100% rename from tests/assets/imagenet_dataset/label_0_2.jpg rename to tests/assets/imagenet_dataset/label_0/label_0_2.jpg diff --git a/tests/assets/imagenet_dataset/label_1/label_1_1/label_1_1.jpg b/tests/assets/imagenet_dataset/label_1/label_1_1/label_1_1.jpg deleted file mode 100644 index 9d28e0c15e09d6a84d9adf911075171c481c09ac..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 631 zcmex=^(PF6}rMnOeST|r4lSw=>~TvNxu(8R<gTWM0TY@5u?V53ptdXHXalWy7)oGIH{I3zSIJR&kGIVCkMJtH%#xTLhKyrQzI zxuvzOy`!^h(&Q;qr%j(RbJn88OO`HMzGCI7O`ErD-L`$l&RvHNA31vL_=%IJE?vHI z_1g6tH*YuS~;l_iU%Emz-M3agxa*3&!JXHM%@*3D@ k#CfcVET6$WhVa)d1|DWcVB|3iGT1YG;L=#sVE_Ln0Q-o|ng9R* diff --git a/tests/assets/imagenet_subsets_dataset/test/label_0_2.jpg b/tests/assets/imagenet_subsets_dataset/test/label_0/label_0_2.jpg similarity index 100% rename from tests/assets/imagenet_subsets_dataset/test/label_0_2.jpg rename to tests/assets/imagenet_subsets_dataset/test/label_0/label_0_2.jpg diff --git a/tests/assets/imagenet_subsets_dataset/test/label_1/label_1_1/label_1_1.jpg b/tests/assets/imagenet_subsets_dataset/test/label_1/label_1_1/label_1_1.jpg deleted file mode 100644 index 9d28e0c15e09d6a84d9adf911075171c481c09ac..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 631 zcmex=^(PF6}rMnOeST|r4lSw=>~TvNxu(8R<gTWM0TY@5u?V53ptdXHXalWy7)oGIH{I3zSIJR&kGIVCkMJtH%#xTLhKyrQzI zxuvzOy`!^h(&Q;qr%j(RbJn88OO`HMzGCI7O`ErD-L`$l&RvHNA31vL_=%IJE?vHI z_1g6tH*YuS~;l_iU%Emz-M3agxa*3&!JXHM%@*3D@ k#CfcVET6$WhVa)d1|DWcVB|3iGT1YG;L=#sVE_Ln0Q-o|ng9R* diff --git a/tests/assets/imagenet_subsets_dataset/train/label_0_2.jpg b/tests/assets/imagenet_subsets_dataset/train/label_0/label_0_2.jpg similarity index 100% rename from tests/assets/imagenet_subsets_dataset/train/label_0_2.jpg rename to tests/assets/imagenet_subsets_dataset/train/label_0/label_0_2.jpg diff --git a/tests/assets/imagenet_subsets_dataset/train/label_1/label_1_1/label_1_1.jpg b/tests/assets/imagenet_subsets_dataset/train/label_1/label_1_1/label_1_1.jpg deleted file mode 100644 index 9d28e0c15e09d6a84d9adf911075171c481c09ac..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 631 zcmex=^(PF6}rMnOeST|r4lSw=>~TvNxu(8R<gTWM0TY@5u?V53ptdXHXalWy7)oGIH{I3zSIJR&kGIVCkMJtH%#xTLhKyrQzI zxuvzOy`!^h(&Q;qr%j(RbJn88OO`HMzGCI7O`ErD-L`$l&RvHNA31vL_=%IJE?vHI z_1g6tH*YuS~;l_iU%Emz-M3agxa*3&!JXHM%@*3D@ k#CfcVET6$WhVa)d1|DWcVB|3iGT1YG;L=#sVE_Ln0Q-o|ng9R* diff --git a/tests/assets/imagenet_subsets_dataset/val/label_0_2.jpg b/tests/assets/imagenet_subsets_dataset/val/label_0/label_0_2.jpg similarity index 100% rename from tests/assets/imagenet_subsets_dataset/val/label_0_2.jpg rename to tests/assets/imagenet_subsets_dataset/val/label_0/label_0_2.jpg diff --git a/tests/assets/imagenet_subsets_dataset/val/label_1/label_1_1/label_1_1.jpg b/tests/assets/imagenet_subsets_dataset/val/label_1/label_1_1/label_1_1.jpg deleted file mode 100644 index 9d28e0c15e09d6a84d9adf911075171c481c09ac..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 631 zcmex=^(PF6}rMnOeST|r4lSw=>~TvNxu(8R<gTWM0TY@5u?V53ptdXHXalWy7)oGIH{I3zSIJR&kGIVCkMJtH%#xTLhKyrQzI zxuvzOy`!^h(&Q;qr%j(RbJn88OO`HMzGCI7O`ErD-L`$l&RvHNA31vL_=%IJE?vHI z_1g6tH*YuS~;l_iU%Emz-M3agxa*3&!JXHM%@*3D@ k#CfcVET6$WhVa)d1|DWcVB|3iGT1YG;L=#sVE_Ln0Q-o|ng9R* diff --git a/tests/unit/test_image_dir_format.py b/tests/unit/test_image_dir_format.py index 7389ebab36..ddbd0e5414 100644 --- a/tests/unit/test_image_dir_format.py +++ b/tests/unit/test_image_dir_format.py @@ -32,6 +32,21 @@ def test_can_load(self): require_media=True, ) + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_relative_paths(self): + dataset = Dataset.from_iterable( + [ + DatasetItem(id="1", media=Image.from_numpy(data=np.ones((4, 2, 3)))), + DatasetItem(id="subdir1/1", media=Image.from_numpy(data=np.ones((2, 6, 3)))), + DatasetItem(id="subdir2/1", media=Image.from_numpy(data=np.ones((5, 4, 3)))), + ] + ) + + with TestDir() as test_dir: + check_save_and_load( + self, dataset, ImageDirExporter.convert, test_dir, importer="image_dir" + ) + @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self): dataset = Dataset.from_iterable( @@ -51,8 +66,12 @@ def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self): def test_can_save_and_load_image_with_arbitrary_extension(self): dataset = Dataset.from_iterable( [ - DatasetItem(id="1", media=Image.from_numpy(data=np.zeros((4, 3, 3)), ext=".JPEG")), - DatasetItem(id="2", media=Image.from_numpy(data=np.zeros((3, 4, 3)), ext=".bmp")), + DatasetItem( + id="q/1", media=Image.from_numpy(data=np.zeros((4, 3, 3)), ext=".JPEG") + ), + DatasetItem( + id="a/b/c/2", media=Image.from_numpy(data=np.zeros((3, 4, 3)), ext=".bmp") + ), ] ) diff --git a/tests/unit/test_imagenet_format.py b/tests/unit/test_imagenet_format.py index e650a5c7d2..dd4e6d009e 100644 --- a/tests/unit/test_imagenet_format.py +++ b/tests/unit/test_imagenet_format.py @@ -1,6 +1,5 @@ import pickle # nosec B403 from copy import deepcopy -from pathlib import Path from unittest import TestCase import numpy as np @@ -190,13 +189,9 @@ def _create_expected_dataset(self): annotations=[Label(0)], ), DatasetItem( - id="no_label:label_0_2", + id="label_0:label_0_2", media=Image.from_numpy(data=np.ones((10, 10, 3))), - ), - DatasetItem( - id=f"{Path('label_1', 'label_1_1')}:label_1_1", - media=Image.from_numpy(data=np.ones((8, 8, 3))), - annotations=[Label(2)], + annotations=[Label(0)], ), DatasetItem( id="label_1:label_1_1", @@ -206,7 +201,7 @@ def _create_expected_dataset(self): ], categories={ AnnotationType.label: LabelCategories.from_iterable( - ("label_0", "label_1", f"{Path('label_1', 'label_1_1')}") + "label_" + str(label) for label in range(2) ), }, ) @@ -239,7 +234,7 @@ class ImagenetWithSubsetDirsImporterTest(ImagenetImporterTest): @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_import(self): - dataset = Dataset.import_from(self.DUMMY_DATASET_DIR, self.FORMAT_NAME) + dataset = Dataset.import_from(self.DUMMY_DATASET_DIR, "imagenet_with_subset_dirs") for subset_name, subset in dataset.subsets().items(): expected_dataset = self._create_expected_dataset().transform(