From a1fcfc8c0a2044a27a978ccd76a9fbc443613c84 Mon Sep 17 00:00:00 2001
From: brimoor <brimoor@umich.edu>
Date: Wed, 6 Mar 2024 23:39:38 -0500
Subject: [PATCH] linting

---
 .../user_guide/dataset_zoo/datasets.rst       | 114 ++++++++---------
 fiftyone/utils/places.py                      |  60 +++++----
 fiftyone/zoo/datasets/base.py                 | 118 ++++++++++--------
 3 files changed, 149 insertions(+), 143 deletions(-)

diff --git a/docs/source/user_guide/dataset_zoo/datasets.rst b/docs/source/user_guide/dataset_zoo/datasets.rst
index 8ef4709a7a..29cc2a10f9 100644
--- a/docs/source/user_guide/dataset_zoo/datasets.rst
+++ b/docs/source/user_guide/dataset_zoo/datasets.rst
@@ -68,6 +68,8 @@ This page lists all of the datasets available in the Dataset Zoo.
     +--------------------------------------------------------------------+---------------------------------------------------------------------------+
     | :ref:`Open Images V7 <dataset-zoo-open-images-v7>`                 | image, classification, detection, segmentation, keypoints, relationships  |
     +--------------------------------------------------------------------+---------------------------------------------------------------------------+
+    | :ref:`Places <dataset-zoo-places>`                                 | image, classification                                                     |
+    +--------------------------------------------------------------------+---------------------------------------------------------------------------+
     | :ref:`Quickstart <dataset-zoo-quickstart>`                         | image, quickstart                                                         |
     +--------------------------------------------------------------------+---------------------------------------------------------------------------+
     | :ref:`Quickstart Geo <dataset-zoo-quickstart-geo>`                 | image, location, quickstart                                               |
@@ -84,8 +86,6 @@ This page lists all of the datasets available in the Dataset Zoo.
     +--------------------------------------------------------------------+---------------------------------------------------------------------------+
     | :ref:`VOC-2012 <dataset-zoo-voc-2012>`                             | image, detection                                                          |
     +--------------------------------------------------------------------+---------------------------------------------------------------------------+
-    | :ref:`Places <dataset-zoo-places>`                                 | image, classification                                                          |
-    +--------------------------------------------------------------------+---------------------------------------------------------------------------+
 
 .. _dataset-zoo-activitynet-100:
 
@@ -3199,6 +3199,61 @@ Images V7 by passing them to
    :alt: open-images-v7
    :align: center
 
+.. _dataset-zoo-places:
+
+Places
+------
+
+Places is a scene recognition dataset of 10 million images comprising ~400
+unique scene categories.
+
+The images are labeled with scene semantic categories, comprising a large
+and diverse list of the types of environments encountered in the world.
+
+**Details**
+
+-   Dataset name: ``places``
+-   Dataset source: http://places2.csail.mit.edu/download-private.html
+-   Dataset size: 29 GB
+-   Tags: ``image, classification``
+-   Supported splits: ``train, validation, test``
+-   ZooDataset classes:
+    :class:`PlacesDataset <fiftyone.zoo.datasets.base.PlacesDataset>`
+
+**Full split stats**
+
+-   Train split: 1,803,460 images, with between 3,068 and 5,000 per category
+-   Test split: 328,500 images, with 900 images per category
+-   Validation split: 36,500 images, with 100 images per category
+
+**Example usage**
+
+.. tabs::
+
+  .. group-tab:: Python
+
+    .. code-block:: python
+        :linenos:
+
+        import fiftyone as fo
+        import fiftyone.zoo as foz
+
+        dataset = foz.load_zoo_dataset("places", split="validation")
+
+        session = fo.launch_app(dataset)
+
+  .. group-tab:: CLI
+
+    .. code-block:: shell
+
+        fiftyone zoo datasets load places --split validation
+
+        fiftyone app launch places-validation
+
+.. image:: /images/dataset_zoo/places-validation.png
+   :alt: places-validation
+   :align: center
+
 .. _dataset-zoo-quickstart:
 
 Quickstart
@@ -3835,58 +3890,3 @@ contain annotations.
 .. image:: /images/dataset_zoo/voc-2012-validation.png
    :alt: voc-2012-validation
    :align: center
-
-.. _dataset-zoo-places:
-
-Places
-------
-
-Places is a scene recognition dataset of 10 million images comprising ~400
-unique scene categories.
-
-The images are labeled with scene semantic categories, comprising a large
-and diverse list of the types of environments encountered in the world.
-
-**Details**
-
--   Dataset name: ``places``
--   Dataset source: http://places2.csail.mit.edu/download-private.html
--   Dataset size: 29 GB
--   Tags: ``image, classification``
--   Supported splits: ``train, validation, test``
--   ZooDataset classes:
-    :class:`PlacesDataset <fiftyone.zoo.datasets.base.PlacesDataset>`
-
-**Full split stats**
-
--   Train split: 1,803,460 images, with between 3,068 and 5,000 per category
--   Test split: 328,500 images, with 900 images per category
--   Validation split: 36,500 images, with 100 images per category
-
-**Example usage**
-
-.. tabs::
-
-  .. group-tab:: Python
-
-    .. code-block:: python
-        :linenos:
-
-        import fiftyone as fo
-        import fiftyone.zoo as foz
-
-        dataset = foz.load_zoo_dataset("places", split="validation")
-
-        session = fo.launch_app(dataset)
-
-  .. group-tab:: CLI
-
-    .. code-block:: shell
-
-        fiftyone zoo datasets load places --split validation
-
-        fiftyone app launch places-validation
-
-.. image:: /images/dataset_zoo/places-validation.png
-   :alt: places-validation
-   :align: center
diff --git a/fiftyone/utils/places.py b/fiftyone/utils/places.py
index d13f60ac85..86d3083c8e 100644
--- a/fiftyone/utils/places.py
+++ b/fiftyone/utils/places.py
@@ -1,8 +1,8 @@
 """
 Utilities for working with the
-`Places dataset <http://places2.csail.mit.edu/index.html>`.
+`Places dataset <http://places2.csail.mit.edu/index.html>`_.
 
-| Copyright 2024, Voxel51, Inc.
+| Copyright 2017-2024, Voxel51, Inc.
 | `voxel51.com <https://voxel51.com/>`_
 |
 """
@@ -22,11 +22,7 @@
 logger = logging.getLogger(__name__)
 
 
-def download_places_dataset_split(
-    dataset_dir,
-    split,
-    raw_dir=None,
-):
+def download_places_dataset_split(dataset_dir, split, raw_dir=None):
     """Utility that downloads splits of the
     `Places dataset <http://places2.csail.mit.edu/index.html>`.
 
@@ -46,7 +42,6 @@ def download_places_dataset_split(
         -   did_download: whether any content was downloaded (True) or if all
             necessary files were already downloaded (False)
     """
-
     if split not in _IMAGE_DOWNLOAD_LINKS:
         raise ValueError(
             "Unsupported split '%s'; supported values are %s"
@@ -194,27 +189,25 @@ def download_places_dataset_split(
 
 
 class PlacesDatasetImporter(foud.LabeledImageDatasetImporter):
-    """Base class for importing datasets in Places format.
-
-    See :class:`fiftyone.types.PlacesDataset` for format details.
+    """Class for importing datasets written by
+    :meth:download_places_dataset_split`.
 
     Args:
         dataset_dir: the dataset directory
         shuffle (False): whether to randomly shuffle the order in which the
             samples are imported
         seed (None): a random seed to use when shuffling
-        max_samples (None): a maximum number of samples to load.
-            By default, all matching samples are loaded.
+        max_samples (None): a maximum number of samples to load. By default,
+            all samples are imported
     """
 
     def __init__(
         self,
-        dataset_dir=None,
+        dataset_dir,
         shuffle=False,
         seed=None,
         max_samples=None,
     ):
-
         super().__init__(
             dataset_dir=dataset_dir,
             shuffle=shuffle,
@@ -222,8 +215,8 @@ def __init__(
             max_samples=max_samples,
         )
 
-        self._labels_map = None
         self._images_map = None
+        self._labels_map = None
         self._uuids = None
         self._iter_uuids = None
 
@@ -243,16 +236,17 @@ def __iter__(self):
         self._iter_uuids = iter(self._uuids)
         return self
 
+    def __len__(self):
+        return len(self._uuids)
+
     def __next__(self):
         image_id = next(self._iter_uuids)
         image_path = self._images_map[image_id]
+        uuid = os.path.basename(image_path)
 
-        if self._labels_map:
-            label = fol.Classification(
-                label=self._labels_map[os.path.basename(image_path)]
-            )
-        else:
-            label = None
+        label = self._labels_map.get(uuid, None)
+        if label is not None:
+            label = fol.Classification(label=label)
 
         return image_path, None, label
 
@@ -269,22 +263,24 @@ def setup(self):
             os.path.splitext(filename)[0]: os.path.join(data_dir, filename)
             for filename in etau.list_files(data_dir)
         }
-        available_ids = list(images_map.keys())
+
+        labels_path = os.path.join(labels_dir, "labels.json")
+        if os.path.isfile(labels_path):
+            labels_map = etas.load_json(labels_path)
+        else:
+            labels_map = {}
+
+        uuids = list(images_map.keys())
 
         if self.shuffle:
-            random.shuffle(available_ids)
+            random.shuffle(uuids)
 
         if self.max_samples is not None:
-            if not self.shuffle:
-                random.shuffle(available_ids)
-            available_ids = available_ids[: self.max_samples]
+            uuids = uuids[: self.max_samples]
 
-        self._uuids = available_ids
         self._images_map = images_map
-        if os.path.exists(labels_dir):
-            self._labels_map = etas.load_json(
-                os.path.join(labels_dir, "labels.json")
-            )
+        self._labels_map = labels_map
+        self._uuids = uuids
 
     @staticmethod
     def _get_num_samples(dataset_dir):
diff --git a/fiftyone/zoo/datasets/base.py b/fiftyone/zoo/datasets/base.py
index 83e0355517..7aa2a55faa 100644
--- a/fiftyone/zoo/datasets/base.py
+++ b/fiftyone/zoo/datasets/base.py
@@ -2894,6 +2894,69 @@ def _download_and_prepare(self, dataset_dir, _, split):
         return dataset_type, num_samples, classes
 
 
+class PlacesDataset(FiftyOneDataset):
+    """Places is a scene recognition dataset of 10 million images comprising
+    ~400 unique scene categories.
+
+    The images are labeled with scene semantic categories, comprising a large
+    and diverse list of the types of environments encountered in the world.
+
+    Full split stats:
+
+    -   Train split: 1,803,460 images, with between 3,068 and 5,000 per category
+    -   Test split: 328,500 images, with 900 images per category
+    -   Validation split: 36,500 images, with 100 images per category
+
+    Example usage::
+
+        import fiftyone as fo
+        import fiftyone.zoo as foz
+
+        dataset = foz.load_zoo_dataset("places", split="validation")
+
+        session = fo.launch_app(dataset)
+
+    Dataset size
+        29 GB
+
+    Source
+        http://places2.csail.mit.edu/download-private.html
+    """
+
+    @property
+    def name(self):
+        return "places"
+
+    @property
+    def tags(self):
+        return ("image", "classification")
+
+    @property
+    def supported_splits(self):
+        return ("train", "test", "validation")
+
+    @property
+    def supports_partial_downloads(self):
+        return False
+
+    def _download_and_prepare(self, dataset_dir, _, split):
+        num_samples, classes, _ = foup.download_places_dataset_split(
+            dataset_dir,
+            split,
+            raw_dir=self._get_raw_dir(dataset_dir),
+        )
+
+        dataset_type = fot.PlacesDataset()
+
+        return dataset_type, num_samples, classes
+
+    def _get_raw_dir(self, dataset_dir):
+        # A split-independent location to store full annotation files so that
+        # they never need to be redownloaded
+        root_dir = os.path.dirname(os.path.normpath(dataset_dir))
+        return os.path.join(root_dir, "raw")
+
+
 class QuickstartDataset(FiftyOneDataset):
     """A small dataset with ground truth bounding boxes and predictions.
 
@@ -3218,59 +3281,6 @@ def _download_and_prepare(self, dataset_dir, scratch_dir, split):
         return dataset_type, num_samples, classes
 
 
-class PlacesDataset(FiftyOneDataset):
-    def __init__(
-        self,
-        classes=None,
-        num_workers=None,
-        shuffle=None,
-        seed=None,
-        max_samples=None,
-    ):
-        self.classes = classes
-        self.num_workers = num_workers
-        self.shuffle = shuffle
-        self.seed = seed
-        self.max_samples = max_samples
-
-    @property
-    def name(self):
-        return "places"
-
-    @property
-    def tags(self):
-        return (
-            "image",
-            "classification",
-        )
-
-    @property
-    def supported_splits(self):
-        return ("train", "test", "validation")
-
-    @property
-    def supports_partial_downloads(self):
-        return False
-
-    def _download_and_prepare(self, dataset_dir, _, split):
-        num_samples, classes, downloaded = foup.download_places_dataset_split(
-            dataset_dir,
-            split,
-            raw_dir=self._get_raw_dir(dataset_dir),
-        )
-
-        dataset_type = fot.PlacesDataset()
-
-        if not downloaded:
-            num_samples = None
-
-        return dataset_type, num_samples, classes
-
-    def _get_raw_dir(self, dataset_dir):
-        root_dir = os.path.dirname(os.path.normpath(dataset_dir))
-        return os.path.join(root_dir, "raw")
-
-
 AVAILABLE_DATASETS = {
     "activitynet-100": ActivityNet100Dataset,
     "activitynet-200": ActivityNet200Dataset,
@@ -3292,13 +3302,13 @@ def _get_raw_dir(self, dataset_dir):
     "lfw": LabeledFacesInTheWildDataset,
     "open-images-v6": OpenImagesV6Dataset,
     "open-images-v7": OpenImagesV7Dataset,
+    "places": PlacesDataset,
     "quickstart": QuickstartDataset,
     "quickstart-geo": QuickstartGeoDataset,
     "quickstart-video": QuickstartVideoDataset,
     "quickstart-groups": QuickstartGroupsDataset,
     "sama-coco": SamaCOCODataset,
     "ucf101": UCF101Dataset,
-    "places": PlacesDataset,
 }