linting

voxel51 · Mar 7, 2024 · a1fcfc8 · a1fcfc8
1 parent 1ad42a0
commit a1fcfc8
Show file tree

Hide file tree

Showing 3 changed files with 149 additions and 143 deletions.
diff --git a/docs/source/user_guide/dataset_zoo/datasets.rst b/docs/source/user_guide/dataset_zoo/datasets.rst
@@ -68,6 +68,8 @@ This page lists all of the datasets available in the Dataset Zoo.
     +--------------------------------------------------------------------+---------------------------------------------------------------------------+
     | :ref:`Open Images V7 <dataset-zoo-open-images-v7>`                 | image, classification, detection, segmentation, keypoints, relationships  |
     +--------------------------------------------------------------------+---------------------------------------------------------------------------+
+    | :ref:`Places <dataset-zoo-places>`                                 | image, classification                                                     |
+    +--------------------------------------------------------------------+---------------------------------------------------------------------------+
     | :ref:`Quickstart <dataset-zoo-quickstart>`                         | image, quickstart                                                         |
     +--------------------------------------------------------------------+---------------------------------------------------------------------------+
     | :ref:`Quickstart Geo <dataset-zoo-quickstart-geo>`                 | image, location, quickstart                                               |
@@ -84,8 +86,6 @@ This page lists all of the datasets available in the Dataset Zoo.
     +--------------------------------------------------------------------+---------------------------------------------------------------------------+
     | :ref:`VOC-2012 <dataset-zoo-voc-2012>`                             | image, detection                                                          |
     +--------------------------------------------------------------------+---------------------------------------------------------------------------+
-    | :ref:`Places <dataset-zoo-places>`                                 | image, classification                                                          |
-    +--------------------------------------------------------------------+---------------------------------------------------------------------------+
 
 .. _dataset-zoo-activitynet-100:
 
@@ -3199,6 +3199,61 @@ Images V7 by passing them to
    :alt: open-images-v7
    :align: center
 
+.. _dataset-zoo-places:
+
+Places
+------
+
+Places is a scene recognition dataset of 10 million images comprising ~400
+unique scene categories.
+
+The images are labeled with scene semantic categories, comprising a large
+and diverse list of the types of environments encountered in the world.
+
+**Details**
+
+-   Dataset name: ``places``
+-   Dataset source: http://places2.csail.mit.edu/download-private.html
+-   Dataset size: 29 GB
+-   Tags: ``image, classification``
+-   Supported splits: ``train, validation, test``
+-   ZooDataset classes:
+    :class:`PlacesDataset <fiftyone.zoo.datasets.base.PlacesDataset>`
+
+**Full split stats**
+
+-   Train split: 1,803,460 images, with between 3,068 and 5,000 per category
+-   Test split: 328,500 images, with 900 images per category
+-   Validation split: 36,500 images, with 100 images per category
+
+**Example usage**
+
+.. tabs::
+
+  .. group-tab:: Python
+
+    .. code-block:: python
+        :linenos:
+
+        import fiftyone as fo
+        import fiftyone.zoo as foz
+
+        dataset = foz.load_zoo_dataset("places", split="validation")
+
+        session = fo.launch_app(dataset)
+
+  .. group-tab:: CLI
+
+    .. code-block:: shell
+
+        fiftyone zoo datasets load places --split validation
+
+        fiftyone app launch places-validation
+
+.. image:: /images/dataset_zoo/places-validation.png
+   :alt: places-validation
+   :align: center
+
 .. _dataset-zoo-quickstart:
 
 Quickstart
@@ -3835,58 +3890,3 @@ contain annotations.
 .. image:: /images/dataset_zoo/voc-2012-validation.png
    :alt: voc-2012-validation
    :align: center
-
-.. _dataset-zoo-places:
-
-Places
-------
-
-Places is a scene recognition dataset of 10 million images comprising ~400
-unique scene categories.
-
-The images are labeled with scene semantic categories, comprising a large
-and diverse list of the types of environments encountered in the world.
-
-**Details**
-
--   Dataset name: ``places``
--   Dataset source: http://places2.csail.mit.edu/download-private.html
--   Dataset size: 29 GB
--   Tags: ``image, classification``
--   Supported splits: ``train, validation, test``
--   ZooDataset classes:
-    :class:`PlacesDataset <fiftyone.zoo.datasets.base.PlacesDataset>`
-
-**Full split stats**
-
--   Train split: 1,803,460 images, with between 3,068 and 5,000 per category
--   Test split: 328,500 images, with 900 images per category
--   Validation split: 36,500 images, with 100 images per category
-
-**Example usage**
-
-.. tabs::
-
-  .. group-tab:: Python
-
-    .. code-block:: python
-        :linenos:
-
-        import fiftyone as fo
-        import fiftyone.zoo as foz
-
-        dataset = foz.load_zoo_dataset("places", split="validation")
-
-        session = fo.launch_app(dataset)
-
-  .. group-tab:: CLI
-
-    .. code-block:: shell
-
-        fiftyone zoo datasets load places --split validation
-
-        fiftyone app launch places-validation
-
-.. image:: /images/dataset_zoo/places-validation.png
-   :alt: places-validation
-   :align: center
diff --git a/fiftyone/utils/places.py b/fiftyone/utils/places.py
@@ -1,8 +1,8 @@
 """
 Utilities for working with the
-`Places dataset <http://places2.csail.mit.edu/index.html>`.
+`Places dataset <http://places2.csail.mit.edu/index.html>`_.
 
-| Copyright 2024, Voxel51, Inc.
+| Copyright 2017-2024, Voxel51, Inc.
 | `voxel51.com <https://voxel51.com/>`_
 |
 """
@@ -22,11 +22,7 @@
 logger = logging.getLogger(__name__)
 
 
-def download_places_dataset_split(
-    dataset_dir,
-    split,
-    raw_dir=None,
-):
+def download_places_dataset_split(dataset_dir, split, raw_dir=None):
     """Utility that downloads splits of the
     `Places dataset <http://places2.csail.mit.edu/index.html>`.
 
@@ -46,7 +42,6 @@ def download_places_dataset_split(
         -   did_download: whether any content was downloaded (True) or if all
             necessary files were already downloaded (False)
     """
-
     if split not in _IMAGE_DOWNLOAD_LINKS:
         raise ValueError(
             "Unsupported split '%s'; supported values are %s"
@@ -194,36 +189,34 @@ def download_places_dataset_split(
 
 
 class PlacesDatasetImporter(foud.LabeledImageDatasetImporter):
-    """Base class for importing datasets in Places format.
-
-    See :class:`fiftyone.types.PlacesDataset` for format details.
+    """Class for importing datasets written by
+    :meth:download_places_dataset_split`.
 
     Args:
         dataset_dir: the dataset directory
         shuffle (False): whether to randomly shuffle the order in which the
             samples are imported
         seed (None): a random seed to use when shuffling
-        max_samples (None): a maximum number of samples to load.
-            By default, all matching samples are loaded.
+        max_samples (None): a maximum number of samples to load. By default,
+            all samples are imported
     """
 
     def __init__(
         self,
-        dataset_dir=None,
+        dataset_dir,
         shuffle=False,
         seed=None,
         max_samples=None,
     ):
-
         super().__init__(
             dataset_dir=dataset_dir,
             shuffle=shuffle,
             seed=seed,
             max_samples=max_samples,
         )
 
-        self._labels_map = None
         self._images_map = None
+        self._labels_map = None
         self._uuids = None
         self._iter_uuids = None
 
@@ -243,16 +236,17 @@ def __iter__(self):
         self._iter_uuids = iter(self._uuids)
         return self
 
+    def __len__(self):
+        return len(self._uuids)
+
     def __next__(self):
         image_id = next(self._iter_uuids)
         image_path = self._images_map[image_id]
+        uuid = os.path.basename(image_path)
 
-        if self._labels_map:
-            label = fol.Classification(
-                label=self._labels_map[os.path.basename(image_path)]
-            )
-        else:
-            label = None
+        label = self._labels_map.get(uuid, None)
+        if label is not None:
+            label = fol.Classification(label=label)
 
         return image_path, None, label
 
@@ -269,22 +263,24 @@ def setup(self):
             os.path.splitext(filename)[0]: os.path.join(data_dir, filename)
             for filename in etau.list_files(data_dir)
         }
-        available_ids = list(images_map.keys())
+
+        labels_path = os.path.join(labels_dir, "labels.json")
+        if os.path.isfile(labels_path):
+            labels_map = etas.load_json(labels_path)
+        else:
+            labels_map = {}
+
+        uuids = list(images_map.keys())
 
         if self.shuffle:
-            random.shuffle(available_ids)
+            random.shuffle(uuids)
 
         if self.max_samples is not None:
-            if not self.shuffle:
-                random.shuffle(available_ids)
-            available_ids = available_ids[: self.max_samples]
+            uuids = uuids[: self.max_samples]
 
-        self._uuids = available_ids
         self._images_map = images_map
-        if os.path.exists(labels_dir):
-            self._labels_map = etas.load_json(
-                os.path.join(labels_dir, "labels.json")
-            )
+        self._labels_map = labels_map
+        self._uuids = uuids
 
     @staticmethod
     def _get_num_samples(dataset_dir):