From a1fcfc8c0a2044a27a978ccd76a9fbc443613c84 Mon Sep 17 00:00:00 2001 From: brimoor Date: Wed, 6 Mar 2024 23:39:38 -0500 Subject: [PATCH] linting --- .../user_guide/dataset_zoo/datasets.rst | 114 ++++++++--------- fiftyone/utils/places.py | 60 +++++---- fiftyone/zoo/datasets/base.py | 118 ++++++++++-------- 3 files changed, 149 insertions(+), 143 deletions(-) diff --git a/docs/source/user_guide/dataset_zoo/datasets.rst b/docs/source/user_guide/dataset_zoo/datasets.rst index 8ef4709a7a..29cc2a10f9 100644 --- a/docs/source/user_guide/dataset_zoo/datasets.rst +++ b/docs/source/user_guide/dataset_zoo/datasets.rst @@ -68,6 +68,8 @@ This page lists all of the datasets available in the Dataset Zoo. +--------------------------------------------------------------------+---------------------------------------------------------------------------+ | :ref:`Open Images V7 ` | image, classification, detection, segmentation, keypoints, relationships | +--------------------------------------------------------------------+---------------------------------------------------------------------------+ + | :ref:`Places ` | image, classification | + +--------------------------------------------------------------------+---------------------------------------------------------------------------+ | :ref:`Quickstart ` | image, quickstart | +--------------------------------------------------------------------+---------------------------------------------------------------------------+ | :ref:`Quickstart Geo ` | image, location, quickstart | @@ -84,8 +86,6 @@ This page lists all of the datasets available in the Dataset Zoo. +--------------------------------------------------------------------+---------------------------------------------------------------------------+ | :ref:`VOC-2012 ` | image, detection | +--------------------------------------------------------------------+---------------------------------------------------------------------------+ - | :ref:`Places ` | image, classification | - +--------------------------------------------------------------------+---------------------------------------------------------------------------+ .. _dataset-zoo-activitynet-100: @@ -3199,6 +3199,61 @@ Images V7 by passing them to :alt: open-images-v7 :align: center +.. _dataset-zoo-places: + +Places +------ + +Places is a scene recognition dataset of 10 million images comprising ~400 +unique scene categories. + +The images are labeled with scene semantic categories, comprising a large +and diverse list of the types of environments encountered in the world. + +**Details** + +- Dataset name: ``places`` +- Dataset source: http://places2.csail.mit.edu/download-private.html +- Dataset size: 29 GB +- Tags: ``image, classification`` +- Supported splits: ``train, validation, test`` +- ZooDataset classes: + :class:`PlacesDataset ` + +**Full split stats** + +- Train split: 1,803,460 images, with between 3,068 and 5,000 per category +- Test split: 328,500 images, with 900 images per category +- Validation split: 36,500 images, with 100 images per category + +**Example usage** + +.. tabs:: + + .. group-tab:: Python + + .. code-block:: python + :linenos: + + import fiftyone as fo + import fiftyone.zoo as foz + + dataset = foz.load_zoo_dataset("places", split="validation") + + session = fo.launch_app(dataset) + + .. group-tab:: CLI + + .. code-block:: shell + + fiftyone zoo datasets load places --split validation + + fiftyone app launch places-validation + +.. image:: /images/dataset_zoo/places-validation.png + :alt: places-validation + :align: center + .. _dataset-zoo-quickstart: Quickstart @@ -3835,58 +3890,3 @@ contain annotations. .. image:: /images/dataset_zoo/voc-2012-validation.png :alt: voc-2012-validation :align: center - -.. _dataset-zoo-places: - -Places ------- - -Places is a scene recognition dataset of 10 million images comprising ~400 -unique scene categories. - -The images are labeled with scene semantic categories, comprising a large -and diverse list of the types of environments encountered in the world. - -**Details** - -- Dataset name: ``places`` -- Dataset source: http://places2.csail.mit.edu/download-private.html -- Dataset size: 29 GB -- Tags: ``image, classification`` -- Supported splits: ``train, validation, test`` -- ZooDataset classes: - :class:`PlacesDataset ` - -**Full split stats** - -- Train split: 1,803,460 images, with between 3,068 and 5,000 per category -- Test split: 328,500 images, with 900 images per category -- Validation split: 36,500 images, with 100 images per category - -**Example usage** - -.. tabs:: - - .. group-tab:: Python - - .. code-block:: python - :linenos: - - import fiftyone as fo - import fiftyone.zoo as foz - - dataset = foz.load_zoo_dataset("places", split="validation") - - session = fo.launch_app(dataset) - - .. group-tab:: CLI - - .. code-block:: shell - - fiftyone zoo datasets load places --split validation - - fiftyone app launch places-validation - -.. image:: /images/dataset_zoo/places-validation.png - :alt: places-validation - :align: center diff --git a/fiftyone/utils/places.py b/fiftyone/utils/places.py index d13f60ac85..86d3083c8e 100644 --- a/fiftyone/utils/places.py +++ b/fiftyone/utils/places.py @@ -1,8 +1,8 @@ """ Utilities for working with the -`Places dataset `. +`Places dataset `_. -| Copyright 2024, Voxel51, Inc. +| Copyright 2017-2024, Voxel51, Inc. | `voxel51.com `_ | """ @@ -22,11 +22,7 @@ logger = logging.getLogger(__name__) -def download_places_dataset_split( - dataset_dir, - split, - raw_dir=None, -): +def download_places_dataset_split(dataset_dir, split, raw_dir=None): """Utility that downloads splits of the `Places dataset `. @@ -46,7 +42,6 @@ def download_places_dataset_split( - did_download: whether any content was downloaded (True) or if all necessary files were already downloaded (False) """ - if split not in _IMAGE_DOWNLOAD_LINKS: raise ValueError( "Unsupported split '%s'; supported values are %s" @@ -194,27 +189,25 @@ def download_places_dataset_split( class PlacesDatasetImporter(foud.LabeledImageDatasetImporter): - """Base class for importing datasets in Places format. - - See :class:`fiftyone.types.PlacesDataset` for format details. + """Class for importing datasets written by + :meth:download_places_dataset_split`. Args: dataset_dir: the dataset directory shuffle (False): whether to randomly shuffle the order in which the samples are imported seed (None): a random seed to use when shuffling - max_samples (None): a maximum number of samples to load. - By default, all matching samples are loaded. + max_samples (None): a maximum number of samples to load. By default, + all samples are imported """ def __init__( self, - dataset_dir=None, + dataset_dir, shuffle=False, seed=None, max_samples=None, ): - super().__init__( dataset_dir=dataset_dir, shuffle=shuffle, @@ -222,8 +215,8 @@ def __init__( max_samples=max_samples, ) - self._labels_map = None self._images_map = None + self._labels_map = None self._uuids = None self._iter_uuids = None @@ -243,16 +236,17 @@ def __iter__(self): self._iter_uuids = iter(self._uuids) return self + def __len__(self): + return len(self._uuids) + def __next__(self): image_id = next(self._iter_uuids) image_path = self._images_map[image_id] + uuid = os.path.basename(image_path) - if self._labels_map: - label = fol.Classification( - label=self._labels_map[os.path.basename(image_path)] - ) - else: - label = None + label = self._labels_map.get(uuid, None) + if label is not None: + label = fol.Classification(label=label) return image_path, None, label @@ -269,22 +263,24 @@ def setup(self): os.path.splitext(filename)[0]: os.path.join(data_dir, filename) for filename in etau.list_files(data_dir) } - available_ids = list(images_map.keys()) + + labels_path = os.path.join(labels_dir, "labels.json") + if os.path.isfile(labels_path): + labels_map = etas.load_json(labels_path) + else: + labels_map = {} + + uuids = list(images_map.keys()) if self.shuffle: - random.shuffle(available_ids) + random.shuffle(uuids) if self.max_samples is not None: - if not self.shuffle: - random.shuffle(available_ids) - available_ids = available_ids[: self.max_samples] + uuids = uuids[: self.max_samples] - self._uuids = available_ids self._images_map = images_map - if os.path.exists(labels_dir): - self._labels_map = etas.load_json( - os.path.join(labels_dir, "labels.json") - ) + self._labels_map = labels_map + self._uuids = uuids @staticmethod def _get_num_samples(dataset_dir): diff --git a/fiftyone/zoo/datasets/base.py b/fiftyone/zoo/datasets/base.py index 83e0355517..7aa2a55faa 100644 --- a/fiftyone/zoo/datasets/base.py +++ b/fiftyone/zoo/datasets/base.py @@ -2894,6 +2894,69 @@ def _download_and_prepare(self, dataset_dir, _, split): return dataset_type, num_samples, classes +class PlacesDataset(FiftyOneDataset): + """Places is a scene recognition dataset of 10 million images comprising + ~400 unique scene categories. + + The images are labeled with scene semantic categories, comprising a large + and diverse list of the types of environments encountered in the world. + + Full split stats: + + - Train split: 1,803,460 images, with between 3,068 and 5,000 per category + - Test split: 328,500 images, with 900 images per category + - Validation split: 36,500 images, with 100 images per category + + Example usage:: + + import fiftyone as fo + import fiftyone.zoo as foz + + dataset = foz.load_zoo_dataset("places", split="validation") + + session = fo.launch_app(dataset) + + Dataset size + 29 GB + + Source + http://places2.csail.mit.edu/download-private.html + """ + + @property + def name(self): + return "places" + + @property + def tags(self): + return ("image", "classification") + + @property + def supported_splits(self): + return ("train", "test", "validation") + + @property + def supports_partial_downloads(self): + return False + + def _download_and_prepare(self, dataset_dir, _, split): + num_samples, classes, _ = foup.download_places_dataset_split( + dataset_dir, + split, + raw_dir=self._get_raw_dir(dataset_dir), + ) + + dataset_type = fot.PlacesDataset() + + return dataset_type, num_samples, classes + + def _get_raw_dir(self, dataset_dir): + # A split-independent location to store full annotation files so that + # they never need to be redownloaded + root_dir = os.path.dirname(os.path.normpath(dataset_dir)) + return os.path.join(root_dir, "raw") + + class QuickstartDataset(FiftyOneDataset): """A small dataset with ground truth bounding boxes and predictions. @@ -3218,59 +3281,6 @@ def _download_and_prepare(self, dataset_dir, scratch_dir, split): return dataset_type, num_samples, classes -class PlacesDataset(FiftyOneDataset): - def __init__( - self, - classes=None, - num_workers=None, - shuffle=None, - seed=None, - max_samples=None, - ): - self.classes = classes - self.num_workers = num_workers - self.shuffle = shuffle - self.seed = seed - self.max_samples = max_samples - - @property - def name(self): - return "places" - - @property - def tags(self): - return ( - "image", - "classification", - ) - - @property - def supported_splits(self): - return ("train", "test", "validation") - - @property - def supports_partial_downloads(self): - return False - - def _download_and_prepare(self, dataset_dir, _, split): - num_samples, classes, downloaded = foup.download_places_dataset_split( - dataset_dir, - split, - raw_dir=self._get_raw_dir(dataset_dir), - ) - - dataset_type = fot.PlacesDataset() - - if not downloaded: - num_samples = None - - return dataset_type, num_samples, classes - - def _get_raw_dir(self, dataset_dir): - root_dir = os.path.dirname(os.path.normpath(dataset_dir)) - return os.path.join(root_dir, "raw") - - AVAILABLE_DATASETS = { "activitynet-100": ActivityNet100Dataset, "activitynet-200": ActivityNet200Dataset, @@ -3292,13 +3302,13 @@ def _get_raw_dir(self, dataset_dir): "lfw": LabeledFacesInTheWildDataset, "open-images-v6": OpenImagesV6Dataset, "open-images-v7": OpenImagesV7Dataset, + "places": PlacesDataset, "quickstart": QuickstartDataset, "quickstart-geo": QuickstartGeoDataset, "quickstart-video": QuickstartVideoDataset, "quickstart-groups": QuickstartGroupsDataset, "sama-coco": SamaCOCODataset, "ucf101": UCF101Dataset, - "places": PlacesDataset, }