tracel-ai · laggui · Nov 11, 2024 · Oct 19, 2024 · Oct 22, 2024 · Oct 30, 2024
diff --git a/crates/burn-dataset/src/vision/image_folder.rs b/crates/burn-dataset/src/vision/image_folder.rs
@@ -76,7 +76,7 @@ pub enum Annotation {
 #[derive(Debug, Clone, PartialEq)]
 pub struct SegmentationMask {
     /// Segmentation mask.
-    pub mask: Vec<usize>,
+    pub mask: Vec<PixelDepth>,
 }
 
 /// Object detection bounding box annotation.
@@ -104,7 +104,8 @@ pub struct ImageDatasetItem {
 enum AnnotationRaw {
     Label(String),
     MultiLabel(Vec<String>),
-    // TODO: bounding boxes and segmentation mask
+    SegmentationMask(PathBuf),
+    // TODO: bounding boxes
 }
 
 #[derive(Deserialize, Serialize, Debug, Clone)]
@@ -129,15 +130,77 @@ struct PathToImageDatasetItem {
     classes: HashMap<String, usize>,
 }
 
+fn image_path_to_vec_pixel_depth(image_path: &PathBuf) -> Vec<PixelDepth> {
+    // Load image from disk
+    let image = image::open(image_path).unwrap();
+
+    // Image as Vec<PixelDepth>
+    let img_vec = match image.color() {
+        ColorType::L8 => image
+            .into_luma8()
+            .iter()
+            .map(|&x| PixelDepth::U8(x))
+            .collect(),
+        ColorType::La8 => image
+            .into_luma_alpha8()
+            .iter()
+            .map(|&x| PixelDepth::U8(x))
+            .collect(),
+        ColorType::L16 => image
+            .into_luma16()
+            .iter()
+            .map(|&x| PixelDepth::U16(x))
+            .collect(),
+        ColorType::La16 => image
+            .into_luma_alpha16()
+            .iter()
+            .map(|&x| PixelDepth::U16(x))
+            .collect(),
+        ColorType::Rgb8 => image
+            .into_rgb8()
+            .iter()
+            .map(|&x| PixelDepth::U8(x))
+            .collect(),
+        ColorType::Rgba8 => image
+            .into_rgba8()
+            .iter()
+            .map(|&x| PixelDepth::U8(x))
+            .collect(),
+        ColorType::Rgb16 => image
+            .into_rgb16()
+            .iter()
+            .map(|&x| PixelDepth::U16(x))
+            .collect(),
+        ColorType::Rgba16 => image
+            .into_rgba16()
+            .iter()
+            .map(|&x| PixelDepth::U16(x))
+            .collect(),
+        ColorType::Rgb32F => image
+            .into_rgb32f()
+            .iter()
+            .map(|&x| PixelDepth::F32(x))
+            .collect(),
+        ColorType::Rgba32F => image
+            .into_rgba32f()
+            .iter()
+            .map(|&x| PixelDepth::F32(x))
+            .collect(),
+        _ => panic!("Unrecognized image color type"),
+    };
+
+    img_vec
+}
+
 /// Parse the image annotation to the corresponding type.
 fn parse_image_annotation(
     annotation: &AnnotationRaw,
     classes: &HashMap<String, usize>,
 ) -> Annotation {
     // TODO: add support for other annotations
     // - [ ] Object bounding boxes
-    // - [ ] Segmentation mask
-    // For now, only image classification labels are supported.
+    // - [x] Segmentation mask
+    // For now, only image classification labels and segmentation are supported.
 
     // Map class string to label id
     match annotation {
@@ -148,6 +211,20 @@ fn parse_image_annotation(
                 .map(|name| *classes.get(name).unwrap())
                 .collect(),
         ),
+        AnnotationRaw::SegmentationMask(mask_path) => {
+            let mask_image = image_path_to_vec_pixel_depth(mask_path);
+            // assume that each channel in the mask image is the same and
+            // each pixel in the first channel corresponds to a class.
+            // multi-channel image segmentation is not supported at this time.
+            Annotation::SegmentationMask(SegmentationMask {
+                mask: mask_image
+                    .into_iter()
+                    .enumerate()
+                    .filter(|(i, _)| i % 3 == 0)
+                    .map(|(_, pixel)| pixel)
+                    .collect(),
+            })
+        }
     }
 }
 
@@ -160,6 +237,7 @@ impl Mapper<ImageDatasetItemRaw, ImageDatasetItem> for PathToImageDatasetItem {
         let image = image::open(&item.image_path).unwrap();
 
         // Image as Vec<PixelDepth>
+        // NOTE: the following logic has been copied to a separate function to be used for Segmentation Masks as well
         let img_vec = match image.color() {
             ColorType::L8 => image
                 .into_luma8()
@@ -401,6 +479,36 @@ impl ImageFolderDataset {
         Self::with_items(items, classes)
     }
 
+    /// Create an image segmentation dataset with the specified items.
+    ///
+    /// # Arguments
+    ///
+    /// * `items` - List of dataset items, each item represented by a tuple `(image path, labels)`.
+    /// * `classes` - Dataset class names.
+    ///
+    /// # Returns
+    /// A new dataset instance.
+    pub fn new_segmentation_with_items<P: AsRef<Path>, S: AsRef<str>>(
+        items: Vec<(P, P)>,
+        classes: &[S],
+    ) -> Result<Self, ImageLoaderError> {
+        // Parse items and check valid image extension types
+        let items = items
+            .into_iter()
+            .map(|(image_path, mask_path)| {
+                // Map image path and segmentation mask path
+                let image_path = image_path.as_ref();
+                let annotation = AnnotationRaw::SegmentationMask(mask_path.as_ref().to_path_buf());
+
+                Self::check_extension(&image_path.extension().unwrap().to_str().unwrap())?;
+
+                Ok(ImageDatasetItemRaw::new(image_path, annotation))
+            })
+            .collect::<Result<Vec<_>, _>>()?;
+
+        Self::with_items(items, classes)
+    }
+
     /// Create an image dataset with the specified items.
     ///
     /// # Arguments
@@ -451,6 +559,7 @@ impl ImageFolderDataset {
 mod tests {
     use super::*;
     const DATASET_ROOT: &str = "tests/data/image_folder";
+    const SEGMASK_ROOT: &str = "tests/data/segmask_folder";
 
     #[test]
     pub fn image_folder_dataset() {
@@ -611,4 +720,128 @@ mod tests {
             Annotation::MultiLabel(vec![0, 2])
         );
     }
+
+    #[test]
+    pub fn segmask_image_path_to_vec_pixel_depth() {
+        let root = Path::new(SEGMASK_ROOT);
+        // test checkerboard mask
+        const TEST_CHECKERBOARD_MASK_PATTERN: [u8; 64 * 3] = [
+            1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1,
+            1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2,
+            2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2,
+            1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1,
+            1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1,
+            1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1,
+            2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1,
+        ];
+        assert_eq!(
+            TEST_CHECKERBOARD_MASK_PATTERN
+                .iter()
+                .map(|&x| PixelDepth::U8(x))
+                .collect::<Vec<PixelDepth>>(),
+            image_path_to_vec_pixel_depth(&root.join("annotations").join("mask_checkerboard.png")),
+        );
+
+        // checkerboard image
+        // TODO: investigate why the channels appear to be reversed, i.e (blue, green, red) rather than (red, green, blue)
+        const TEST_CHECKERBOARD_IMAGE_PATTERN: [u8; 64 * 3] = [
+            220, 20, 60, 0, 255, 255, 220, 20, 60, 0, 255, 255, 220, 20, 60, 0, 255, 255, 220, 20,
+            60, 0, 255, 255, 0, 255, 255, 220, 20, 60, 0, 255, 255, 220, 20, 60, 0, 255, 255, 220,
+            20, 60, 0, 255, 255, 220, 20, 60, 220, 20, 60, 0, 255, 255, 220, 20, 60, 0, 255, 255,
+            220, 20, 60, 0, 255, 255, 220, 20, 60, 0, 255, 255, 0, 255, 255, 220, 20, 60, 0, 255,
+            255, 220, 20, 60, 0, 255, 255, 220, 20, 60, 0, 255, 255, 220, 20, 60, 220, 20, 60, 0,
+            255, 255, 220, 20, 60, 0, 255, 255, 220, 20, 60, 0, 255, 255, 220, 20, 60, 0, 255, 255,
+            0, 255, 255, 220, 20, 60, 0, 255, 255, 220, 20, 60, 0, 255, 255, 220, 20, 60, 0, 255,
+            255, 220, 20, 60, 220, 20, 60, 0, 255, 255, 220, 20, 60, 0, 255, 255, 220, 20, 60, 0,
+            255, 255, 220, 20, 60, 0, 255, 255, 0, 255, 255, 220, 20, 60, 0, 255, 255, 220, 20, 60,
+            0, 255, 255, 220, 20, 60, 0, 255, 255, 220, 20, 60,
+        ];
+        assert_eq!(
+            TEST_CHECKERBOARD_IMAGE_PATTERN
+                .iter()
+                .map(|&x| PixelDepth::U8(x))
+                .collect::<Vec<PixelDepth>>(),
+            image_path_to_vec_pixel_depth(&root.join("images").join("image_checkerboard.png")),
+        );
+    }
+
+    #[test]
+    pub fn segmask_folder_dataset() {
+        let root = Path::new(SEGMASK_ROOT);
+
+        let items = vec![
+            (
+                root.join("images").join("image_checkerboard.png"),
+                root.join("annotations").join("mask_checkerboard.png"),
+            ),
+            (
+                root.join("images").join("image_random_2colors.png"),
+                root.join("annotations").join("mask_random_2colors.png"),
+            ),
+            (
+                root.join("images").join("image_random_3colors.png"),
+                root.join("annotations").join("mask_random_3colors.png"),
+            ),
+        ];
+        let dataset = ImageFolderDataset::new_segmentation_with_items(
+            items,
+            &[
+                "foo", // 0
+                "bar", // 1
+                "baz", // 2
+                "qux", // 3
+            ],
+        )
+        .unwrap();
+
+        // Dataset has 3 elements; each (image, annotation) is a single item
+        assert_eq!(dataset.len(), 3);
+        assert_eq!(dataset.get(3), None);
+
+        // checkerboard mask
+        const TEST_CHECKERBOARD_MASK_PATTERN: [u8; 64] = [
+            1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2,
+            1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1,
+            2, 1, 2, 1, 2, 1,
+        ];
+        assert_eq!(
+            dataset.get(0).unwrap().annotation,
+            Annotation::SegmentationMask(SegmentationMask {
+                mask: TEST_CHECKERBOARD_MASK_PATTERN
+                    .iter()
+                    .map(|&x| PixelDepth::U8(x))
+                    .collect()
+            })
+        );
+        // random 2 colors mask
+        const TEST_RANDOM2COLORS_MASK_PATTERN: [u8; 64] = [
+            1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2,
+            2, 1, 1, 2, 2, 2, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1,
+            1, 1, 1, 1, 1, 1,
+        ];
+        assert_eq!(
+            dataset.get(1).unwrap().annotation,
+            Annotation::SegmentationMask(SegmentationMask {
+                mask: TEST_RANDOM2COLORS_MASK_PATTERN
+                    .iter()
+                    .map(|&x| PixelDepth::U8(x))
+                    .collect()
+            })
+        );
+        // random 3 colors mask
+        const TEST_RANDOM3COLORS_MASK_PATTERN: [u8; 64] = [
+            3, 1, 3, 3, 1, 1, 3, 2, 3, 3, 3, 3, 1, 3, 2, 1, 2, 2, 2, 2, 1, 1, 2, 2, 1, 1, 1, 3, 3,
+            3, 2, 3, 2, 2, 3, 2, 3, 3, 1, 3, 1, 3, 3, 1, 1, 3, 2, 1, 2, 2, 2, 1, 2, 1, 2, 3, 3, 1,
+            3, 3, 2, 1, 2, 2,
+        ];
+        assert_eq!(
+            dataset.get(2).unwrap().annotation,
+            Annotation::SegmentationMask(SegmentationMask {
+                mask: TEST_RANDOM3COLORS_MASK_PATTERN
+                    .iter()
+                    .map(|&x| PixelDepth::U8(x))
+                    .collect()
+            })
+        );
+    }
 }
diff --git a/crates/burn-dataset/tests/data/segmask_folder/annotations/mask_checkerboard.png b/crates/burn-dataset/tests/data/segmask_folder/annotations/mask_checkerboard.png
diff --git a/crates/burn-dataset/tests/data/segmask_folder/annotations/mask_checkerboard.txt b/crates/burn-dataset/tests/data/segmask_folder/annotations/mask_checkerboard.txt
@@ -0,0 +1,8 @@
+1 2 1 2 1 2 1 2
+2 1 2 1 2 1 2 1
+1 2 1 2 1 2 1 2
+2 1 2 1 2 1 2 1
+1 2 1 2 1 2 1 2
+2 1 2 1 2 1 2 1
+1 2 1 2 1 2 1 2
+2 1 2 1 2 1 2 1
diff --git a/crates/burn-dataset/tests/data/segmask_folder/annotations/mask_random_2colors.png b/crates/burn-dataset/tests/data/segmask_folder/annotations/mask_random_2colors.png
diff --git a/crates/burn-dataset/tests/data/segmask_folder/annotations/mask_random_2colors.txt b/crates/burn-dataset/tests/data/segmask_folder/annotations/mask_random_2colors.txt
@@ -0,0 +1,8 @@
+1 2 1 1 1 2 1 1
+1 2 1 1 1 1 2 1
+2 2 2 1 2 1 2 2
+2 2 2 2 2 2 1 1
+2 2 2 1 2 1 1 1
+1 1 2 2 2 2 2 1
+2 2 1 2 1 2 1 2
+2 1 1 1 1 1 1 1
diff --git a/crates/burn-dataset/tests/data/segmask_folder/annotations/mask_random_3colors.png b/crates/burn-dataset/tests/data/segmask_folder/annotations/mask_random_3colors.png
diff --git a/crates/burn-dataset/tests/data/segmask_folder/annotations/mask_random_3colors.txt b/crates/burn-dataset/tests/data/segmask_folder/annotations/mask_random_3colors.txt
@@ -0,0 +1,8 @@
+3 1 3 3 1 1 3 2
+3 3 3 3 1 3 2 1
+2 2 2 2 1 1 2 2
+1 1 1 3 3 3 2 3
+2 2 3 2 3 3 1 3
+1 3 3 1 1 3 2 1
+2 2 2 1 2 1 2 3
+3 1 3 3 2 1 2 2
diff --git a/crates/burn-dataset/tests/data/segmask_folder/images/image_checkerboard.png b/crates/burn-dataset/tests/data/segmask_folder/images/image_checkerboard.png
diff --git a/crates/burn-dataset/tests/data/segmask_folder/images/image_random_2colors.png b/crates/burn-dataset/tests/data/segmask_folder/images/image_random_2colors.png
diff --git a/crates/burn-dataset/tests/data/segmask_folder/images/image_random_3colors.png b/crates/burn-dataset/tests/data/segmask_folder/images/image_random_3colors.png