Pydantic 2.0 migration (#87)

* format * fix root_validator error * enforce pydantic >2.0.0 in setup.py * fix test errors and warnings * fix issues found by mypy * use model_dump in calibration * remove type ignore comments * split airo-dataset-tools readme * remove .dict() and change tests --------- Co-authored-by: tlips <[email protected]>
airo-ugent · Nov 16, 2023 · c15625b · c15625b
1 parent 7794633
commit c15625b
Show file tree

Hide file tree

Showing 19 changed files with 282 additions and 83 deletions.
diff --git a/airo-camera-toolkit/airo_camera_toolkit/calibration/hand_eye_calibration.py b/airo-camera-toolkit/airo_camera_toolkit/calibration/hand_eye_calibration.py
@@ -261,6 +261,6 @@ def calibrate(mode: str, robot_ip: str, camera_serial_number: int) -> None:
             rotation_euler_xyz_in_radians=EulerAngles(roll=roll, pitch=pitch, yaw=yaw),
         )
         with open("camera_pose.json", "w") as f:
-            json.dump(pose_saveable.dict(), f, indent=4)
+            json.dump(pose_saveable.model_dump(), f, indent=4)
 
     calibrate()
diff --git a/airo-camera-toolkit/airo_camera_toolkit/image_transforms/transforms/crop.py b/airo-camera-toolkit/airo_camera_toolkit/image_transforms/transforms/crop.py
@@ -35,7 +35,7 @@ def shape(self) -> ImageShapeType:
         if len(self._input_shape) == 2:
             return self.h, self.w
 
-        c = self._input_shape[2]  # type: ignore
+        c = self._input_shape[2]
         return self.h, self.w, c
 
     def transform_image(self, image: HWCImageType) -> HWCImageType:

diff --git a/airo-camera-toolkit/airo_camera_toolkit/image_transforms/transforms/resize.py b/airo-camera-toolkit/airo_camera_toolkit/image_transforms/transforms/resize.py
@@ -32,7 +32,7 @@ def shape(self) -> ImageShapeType:
         if len(self._input_shape) == 2:
             return self.h, self.w
 
-        c = self._input_shape[2]  # type: ignore
+        c = self._input_shape[2]
         return self.h, self.w, c
 
     def transform_image(self, image: HWCImageType) -> HWCImageType:

diff --git a/airo-camera-toolkit/airo_camera_toolkit/image_transforms/transforms/rotate90.py b/airo-camera-toolkit/airo_camera_toolkit/image_transforms/transforms/rotate90.py
@@ -37,7 +37,7 @@ def shape(self) -> ImageShapeType:
         if len(self._input_shape) == 2:
             return h, w
 
-        c = self._input_shape[2]  # type: ignore
+        c = self._input_shape[2]
         return h, w, c
 
     def transform_image(self, image: HWCImageType) -> HWCImageType:

diff --git a/airo-dataset-tools/README.md b/airo-dataset-tools/README.md
@@ -1,30 +1,17 @@
 # airo-dataset-tools
-Package for working with datasets.
-
-[COCO](https://cocodataset.org/#format-data) is the preferred format for computer vision datasets. We strictly follow their data format with 2 exceptions: segmentation masks are not required for Instance datasets, bounding boxes nor segmentation masks are required for keypoint datasets. This is to limit labeling effort for real-world datasets where you don't always need all annotation types.
-
-Other formats will are added if they are needed for dataset creation (think the format of a labeling tool) or for consumption of the dataset (think the YOLO format for training an object detector). Besides datasets, we also provide tools for other persistent data such as camera intrinsics and extrinsics.
-
-As always, we try to reuse existing tools/code as much as possible, but we have found that keypoints are by far not supported as well as segmentation or detection, so we had to write some custom tooling for working with keypoints.
-
-## Data Parsers
-The functionality is mainly provided in the form of [Pydantic](https://docs.pydantic.dev/) parsers, that can be used to load or create data(sets). The parsers can be found in the `data_parsers` folder.
-
-Avalaible Data Parsers:
-* [COCO Datasets](https://cocodataset.org/#format-data)
-* [CVAT 1.1 Images annotations](https://opencv.github.io/cvat/docs/manual/advanced/xml_format/)
-* [Pose format](docs/pose.md)
-* [Camera instrinsics format](docs/camera_intrinsics.md)
-
-## COCO dataset creation
-We provide a [documented](airo_dataset_tools/cvat_labeling/readme.md) worklow for labeling real-world data with [CVAT]() and to create [COCO]() Keypoints or Instance datasets based on these annotations.
-
-We also provide a number of tools for working with COCO datasets:
-- visualisation using [FiftyOne](https://voxel51.com/)
-- applying Albumentation transforms (e.g. resizing, flipping,...) to a COCO Keypoints dataset and its annotations, see [here](airo_dataset_tools/coco_tools/transform_dataset.py)
-- converting COCO instances to YOLO format, see [here](airo_dataset_tools/coco_tools/coco_instances_to_yolo.py)
-- combining COCO datasets (via datumaro)(TODO)
-
-Most of the COCO tools are available in the CLI, which you can access by running `airo-dataset-tools --help` from the command line.
-
-
+Tools for working with datasets.
+They fall into two categories:
+
+[**COCO related tools**](airo_dataset_tools/coco_tools/README.md):
+* COCO dataset loading (and creation)
+* FiftyOne visualisation
+* Albumentation transforms
+* COCO  to YOLO conversion.
+* CVAT labeling workflow
+
+[**Data formats**](airo_dataset_tools/data_parsers/README.md):
+* 3D poses
+* Camera instrinsics
+
+> [Pydantic](https://docs.pydantic.dev/latest/) is used heavily throughout this package.
+It allows you to easily create Python objects that can be saved and loaded to and from JSON files.
diff --git a/airo-dataset-tools/airo_dataset_tools/coco_tools/README.md b/airo-dataset-tools/airo_dataset_tools/coco_tools/README.md
@@ -0,0 +1,43 @@
+# COCO
+
+Tools for working with COCO datasets.
+Most of the COCO tools are available in the CLI, which you can access by running `airo-dataset-tools --help` from the command line.
+
+Overview of the functionality:
+* [COCO](#dataset-loading) dataset loading
+* COCO dataset creation (e.g. with synthetic data or CVAT)
+* [CVAT labeling workflow](../../airo_dataset_tools/cvat_labeling/readme.md)
+* FiftyOne visualisation (see CLI)
+* Applying Albumentation transforms (e.g. resizing, flipping,...) to a COCO Keypoints dataset and its annotations, see [transform_dataset.py](transform_dataset.py)
+* Converting COCO instances to YOLO format, see [coco_instances_to_yolo.py](coco_instances_to_yolo.py)
+
+## Dataset loading
+We provide two main dataset [classes](./airo_dataset_tools/data_parsers/coco.py) for working with COCO:
+* `CocoInstancesDataset`: for COCO datasets without keypoints
+* `CocoKeypointsDataset`: for COCO datasets with keypoints
+
+You can read more about the COCO dataset format [here](https://cocodataset.org/#format-data).
+It is our preferred format for computer vision datasets.
+
+Loading a COCO dataset can be done as follows:
+```python
+from airo_dataset_tools.data_parsers.coco import CocoInstancesDataset
+
+with open("./test/test_data.instances_val2017_small.json", "r") as file:
+    dataset = CocoInstancesDataset.model_validate_json(file.read())
+
+print(len(dataset.images))
+print(len(dataset.annotations))
+```
+
+## Notes
+
+[COCO](https://cocodataset.org/#format-data) is the preferred format for computer vision datasets. We strictly follow their data format with 2 exceptions:
+* Segmentation masks are not required for Instance datasets,
+* Bounding boxes nor segmentation masks are required for keypoint datasets.
+
+This is to limit labeling effort for real-world datasets where you don't always need all annotation types.
+
+Other formats will are added if they are needed for dataset creation (think the format of a labeling tool) or for consumption of the dataset (think the YOLO format for training an object detector). Besides datasets, we also provide tools for other persistent data such as camera intrinsics and extrinsics.
+
+As always, we try to reuse existing tools/code as much as possible, but we have found that keypoints are by far not supported as well as segmentation or detection, so we had to write some custom tooling for working with keypoints.
diff --git a/airo-dataset-tools/airo_dataset_tools/coco_tools/fiftyone_viewer.py b/airo-dataset-tools/airo_dataset_tools/coco_tools/fiftyone_viewer.py
@@ -11,7 +11,7 @@ def view_coco_dataset(
 
     if dataset_dir is None:
         dataset_dir = os.path.dirname(labels_json_path)
-    if label_types is None:
+    if label_types is None or not label_types:
         label_types = ["detections", "segmentations", "keypoints"]
     else:
         assert all([label_type in ["detections", "segmentations", "keypoints"] for label_type in label_types])

diff --git a/airo-dataset-tools/airo_dataset_tools/coco_tools/split_dataset.py b/airo-dataset-tools/airo_dataset_tools/coco_tools/split_dataset.py
@@ -5,7 +5,7 @@
 from typing import List, Optional
 
 from airo_dataset_tools.data_parsers.coco import CocoImage, CocoInstanceAnnotation, CocoInstancesDataset
-from pydantic.error_wrappers import ValidationError
+from pydantic import ValidationError
 
 
 def split_coco_dataset(
@@ -97,7 +97,7 @@ def split_and_save_coco_dataset(
 
         file_name = coco_json_path.replace(".json", f"_{split_names[split_id]}.json")
         with open(file_name, "w") as f:
-            json.dump(coco_dataset_split.dict(), f)
+            json.dump(coco_dataset_split.model_dump(exclude_none=True), f)
 
 
 if __name__ == "__main__":

diff --git a/airo-dataset-tools/airo_dataset_tools/coco_tools/transform_dataset.py b/airo-dataset-tools/airo_dataset_tools/coco_tools/transform_dataset.py
@@ -177,9 +177,8 @@ def resize_coco_keypoints_dataset(annotations_json_path: str, width: int, height
         transforms, coco_dataset, coco_dataset_dir, transformed_dataset_dir
     )
 
-    transformed_dataset_dict = transformed_dataset.dict(exclude_none=True)
     with open(os.path.join(transformed_dataset_dir, annotations_file_name), "w") as f:
-        json.dump(transformed_dataset_dict, f)
+        json.dump(transformed_dataset.model_dump(exclude_none=True), f)
 
 
 if __name__ == "__main__":

diff --git a/airo-dataset-tools/airo_dataset_tools/cvat_labeling/convert_cvat_to_coco.py b/airo-dataset-tools/airo_dataset_tools/cvat_labeling/convert_cvat_to_coco.py
@@ -106,7 +106,7 @@ def cvat_image_to_coco(  # noqa: C901, too complex
                 annotation_id_counter += 1
 
     coco_model = CocoKeypointsDataset(images=coco_images, annotations=coco_annotations, categories=coco_categories)
-    return coco_model.dict(exclude_none=True)
+    return coco_model.model_dump(exclude_none=True)
 
 
 ####################

diff --git a/airo-dataset-tools/airo_dataset_tools/data_parsers/README.md b/airo-dataset-tools/airo_dataset_tools/data_parsers/README.md
@@ -0,0 +1,31 @@
+# Data parsers
+We call our Pydantic Model classes *data parsers* because we use them to define, load and store data in a specific format.
+For documentation of the COCO data parsers, see [here](../coco_tools/README.md).
+The other data parsers we provide are:
+* [Pose](../../docs/pose.md)
+* [CameraIntrinsics](../../docs/camera_intrinsics.md)
+
+
+## Example usage
+
+:floppy_disk: **Creating a Pydantic Model instance and saving it to json:**
+```python
+from airo_dataset_tools.data_parsers.pose import EulerAngles, Pose, Position
+
+pose = Pose(
+    position_in_meters=Position(x=1.0, y=2.0, z=3.0),
+    rotation_euler_xyz_in_radians=EulerAngles(roll=np.pi / 4, pitch=-np.pi / 2, yaw=np.pi),
+)
+
+with open("pose.json", "w") as file:
+    json.dump(pose.model_dump(exclude_none=True), file, indent=4)
+```
+
+:mag_right: **Loading a Pydantic Model instance from json:**
+```python
+with open("pose.json", "r") as file:
+    pose2 = Pose.model_validate_json(file.read())
+
+x = pose2.position_in_meters.x
+print(x)
+```
diff --git a/airo-dataset-tools/airo_dataset_tools/data_parsers/coco.py b/airo-dataset-tools/airo_dataset_tools/data_parsers/coco.py
@@ -26,13 +26,13 @@
 
 coco_keypoints = CocoKeypoints(categories=categories, images=images, annotations=annotations)
 with open("annotations.json", "w") as file:
-    json.dump(coco_keypoints.dict(exclude_none=True), file, indent=4)
+    json.dump(coco_keypoints.model_dump(exclude_none=True), file, indent=4)
 
 """
 
 from typing import Dict, List, Optional, Sequence, Tuple, Union
 
-from pydantic import BaseModel, root_validator, validator
+from pydantic import BaseModel, field_validator, model_validator
 
 # Used by CocoInfo and CocoImage
 Datetime = str  # COCO uses both YYYY-MM-DD and YYYY-MM-DD HH:MM:SS for datetime
@@ -109,7 +109,8 @@ class CocoInstanceAnnotation(BaseModel, extra="allow"):  # allow extra fields, t
     area: Optional[float] = None
     iscrowd: Optional[IsCrowd] = None
 
-    @validator("iscrowd")
+    @field_validator("iscrowd")
+    @classmethod
     def iscrowd_must_be_binary(cls, v: IsCrowd) -> IsCrowd:
         if v is None:
             return None
@@ -124,13 +125,15 @@ class CocoKeypointAnnotation(CocoInstanceAnnotation):
     # make bbox optional
     bbox: Optional[Tuple[float, float, float, float]] = None
 
-    @validator("keypoints")
+    @field_validator("keypoints")
+    @classmethod
     def keypoints_must_be_multiple_of_three(cls, v: Keypoints) -> Keypoints:
         assert len(v) % 3 == 0, "keypoints list must be a multiple of 3"
         return v
 
-    @validator("keypoints")
-    def keypoints_coordinates_must_be_in_pixel_space(cls, v: Keypoints, values: dict) -> Keypoints:
+    @field_validator("keypoints")
+    @classmethod
+    def keypoints_coordinates_must_be_in_pixel_space(cls, v: Keypoints) -> Keypoints:
         max_coordinate_value = 0.0
         for i in range(0, len(v), 3):
             max_coordinate_value = max(v[i], max_coordinate_value)
@@ -140,17 +143,16 @@ def keypoints_coordinates_must_be_in_pixel_space(cls, v: Keypoints, values: dict
         ), f"keypoints coordinates must be in pixel space, but max_coordinate is {max_coordinate_value}"
         return v
 
-    @root_validator(skip_on_failure=True)
-    def num_keypoints_matches_amount_of_labeled_keypoints(cls, values: dict) -> dict:
-
+    @model_validator(mode="after")
+    def num_keypoints_matches_amount_of_labeled_keypoints(self) -> "CocoKeypointAnnotation":
         labeled_keypoints = 0
-        for v in values["keypoints"][2::3]:
+        for v in self.keypoints[2::3]:
             if v > 0:
                 labeled_keypoints += 1
         assert (
-            labeled_keypoints == values["num_keypoints"]
-        ), f"num_keypoints {values['num_keypoints']} does not match number of labeled of keypoints {labeled_keypoints} for annotation {values['id']}"
-        return values
+            labeled_keypoints == self.num_keypoints
+        ), f"num_keypoints {self.num_keypoints} does not match number of labeled of keypoints {labeled_keypoints} for annotation {self.id}"
+        return self
 
 
 class CocoLicense(BaseModel):
@@ -166,20 +168,21 @@ class CocoInstancesDataset(BaseModel):
     images: List[CocoImage]
     annotations: Sequence[CocoInstanceAnnotation]
 
-    @validator("annotations")
+    @field_validator("annotations")
+    @classmethod
     def annotations_list_cannot_be_empty(cls, v: List[CocoInstanceAnnotation]) -> List[CocoInstanceAnnotation]:
         assert len(v) > 0, "annotations list cannot be empty"
         return v
 
     # skip on failure becasue this validator requires the annotations list to be non-empty
-    @root_validator(skip_on_failure=True)
-    def annotations_catergory_id_exist_in_categories(cls, values: dict) -> dict:
-        category_ids = set([category.id for category in values["categories"]])
-        for annotation in values["annotations"]:
+    @model_validator(mode="after")
+    def annotations_catergory_id_exist_in_categories(self) -> "CocoInstancesDataset":
+        category_ids = set([category.id for category in self.categories])
+        for annotation in self.annotations:
             assert (
                 annotation.category_id in category_ids
             ), f"Annotation {annotation.id} has category_id {annotation.category_id} which does not exist in categories."
-        return values
+        return self
 
 
 class CocoKeypointsDataset(CocoInstancesDataset):
@@ -190,11 +193,11 @@ class CocoKeypointsDataset(CocoInstancesDataset):
     annotations: Sequence[CocoKeypointAnnotation]
 
     # skip on failure becasue this validator requires the annotations list to be non-empty
-    @root_validator(skip_on_failure=True)
-    def num_keypoints_matches_annotations(cls, values: dict) -> dict:
-        category_dict = {category.id: category for category in values["categories"]}
-        for annotation in values["annotations"]:
+    @model_validator(mode="after")
+    def num_keypoints_matches_annotations(self) -> "CocoKeypointsDataset":
+        category_dict = {category.id: category for category in self.categories}
+        for annotation in self.annotations:
             assert len(annotation.keypoints) // 3 == len(
                 category_dict[annotation.category_id].keypoints
             ), f"Number of keypoints for annotation {annotation.id} does not match number of keypoints in category."
-        return values
+        return self
diff --git a/airo-dataset-tools/airo_dataset_tools/data_parsers/cvat_images.py b/airo-dataset-tools/airo_dataset_tools/data_parsers/cvat_images.py
@@ -21,7 +21,7 @@
 
 from typing import Any, List, Optional, Union
 
-from pydantic import BaseModel, validator
+from pydantic import BaseModel, field_validator
 
 
 class Segment(BaseModel):
@@ -95,7 +95,8 @@ class AnnotationItem(BaseModel):
 class Point(AnnotationItem):
     points: str
 
-    @validator("points")
+    @field_validator("points")
+    @classmethod
     def has_two_coordinates(cls, v: str) -> str:
         assert len(v.split(",")) == 2, "each point must be a single 2D coordinate for the AIRO flow."
         return v

diff --git a/airo-dataset-tools/setup.py b/airo-dataset-tools/setup.py
@@ -12,7 +12,7 @@
     description="TODO",
     install_requires=[
         "numpy",
-        "pydantic<2.0.0",  # pydantic 2.0.0 has a lot of breaking changes
+        "pydantic>2.0.0",  # pydantic 2.0.0 has a lot of breaking changes
         "opencv-contrib-python==4.7.0.72",
         "opencv-python-headless==4.7.0.72",  # opencv headless gets installed by some packages, so install it manually to match versions with opencv-contrib-python
         "pycocotools",

diff --git a/airo-dataset-tools/test/test_camera_intrinsics.py b/airo-dataset-tools/test/test_camera_intrinsics.py
@@ -32,9 +32,11 @@ def test_camera_intrinsics_save_and_load(tmp_path: pathlib.Path):
     )
 
     with open(tmp_path / "camera_intrinsics.json", "w") as file:
-        json.dump(camera_intrinsics.dict(exclude_none=True), file, indent=4)
+        json.dump(camera_intrinsics.model_dump(exclude_none=True), file, indent=4)
+
+    with open(tmp_path / "camera_intrinsics.json", "r") as file:
+        camera_intrinsics2 = CameraIntrinsics.model_validate_json(file.read())
 
-    camera_intrinsics2 = CameraIntrinsics.parse_file(tmp_path / "camera_intrinsics.json")
     assert camera_intrinsics2.image_resolution.width == 2208
     assert camera_intrinsics2.image_resolution.height == 1520
     assert camera_intrinsics2.focal_lengths_in_pixels.fx == 1067.91

diff --git a/airo-dataset-tools/test/test_coco_load.py b/airo-dataset-tools/test/test_coco_load.py
@@ -47,7 +47,7 @@ def test_coco_load_keypoints():
         coco_keypoints = CocoKeypointsDataset(**data)
         assert len(coco_keypoints.images) == 2
         assert len(coco_keypoints.categories) == 1
-        assert len(coco_keypoints.annotations) == 2
+        assert len(coco_keypoints.annotations) == 5
 
         assert isinstance(coco_keypoints.categories[0], CocoKeypointCategory)
 

diff --git a/airo-dataset-tools/test/test_coco_split.py b/airo-dataset-tools/test/test_coco_split.py
@@ -13,10 +13,10 @@ def test_keypoints_split():
     with open(annotations, "r") as file:
         data = json.load(file)
         coco_keypoints = CocoInstancesDataset(**data)
-        datasets = split_coco_dataset(coco_keypoints, [0.5, 0.5])
+        datasets = split_coco_dataset(coco_keypoints, [0.5, 0.5], shuffle_before_splitting=False)
         assert len(datasets) == 2
-        assert len(datasets[0].annotations) == 1
-        assert len(datasets[1].annotations) == 1
+        assert len(datasets[0].annotations) == 2
+        assert len(datasets[1].annotations) == 3
         assert len(datasets[0].images) == 1
         assert len(datasets[1].images) == 1
         assert len(datasets[0].annotations[0].keypoints) > 0