diff --git a/datumaro/plugins/imagenet_format.py b/datumaro/plugins/imagenet_format.py index 0e0669a9db..7235553fd2 100644 --- a/datumaro/plugins/imagenet_format.py +++ b/datumaro/plugins/imagenet_format.py @@ -15,7 +15,8 @@ class ImagenetPath: - IMAGES_EXT = '.jpg' + DEFAULT_IMAGE_EXT = '.jpg' + IMAGE_EXT_FORMAT = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif'] IMAGES_DIR_NO_LABEL = 'no_label' @@ -37,7 +38,7 @@ def _load_categories(self, path): def _load_items(self, path): items = {} for image_path in glob(osp.join(path, '*', '*')): - if osp.splitext(image_path)[1] != ImagenetPath.IMAGES_EXT: + if osp.splitext(image_path)[1] not in ImagenetPath.IMAGE_EXT_FORMAT: continue label = osp.basename(osp.dirname(image_path)) image_name = osp.splitext(osp.basename(image_path))[0][len(label) + 1:] @@ -62,7 +63,7 @@ def find_sources(cls, path): class ImagenetConverter(Converter): - DEFAULT_IMAGE_EXT = ImagenetPath.IMAGES_EXT + DEFAULT_IMAGE_EXT = ImagenetPath.DEFAULT_IMAGE_EXT def apply(self): if 1 < len(self._extractor.subsets()): @@ -79,12 +80,10 @@ def apply(self): for label in labels[image_name]: label_name = extractor.categories()[AnnotationType.label][label].name self._save_image(item, osp.join(subset_dir, label_name, - '%s_%s%s' % \ - (label_name, image_name, ImagenetPath.IMAGES_EXT) - )) + '%s_%s' % (label_name, self._make_image_filename(item)))) if not labels[image_name]: self._save_image(item, osp.join(subset_dir, ImagenetPath.IMAGES_DIR_NO_LABEL, - ImagenetPath.IMAGES_DIR_NO_LABEL + '_' + - image_name + ImagenetPath.IMAGES_EXT)) + ImagenetPath.IMAGES_DIR_NO_LABEL + '_' + + self._make_image_filename(item))) diff --git a/datumaro/plugins/imagenet_txt_format.py b/datumaro/plugins/imagenet_txt_format.py index 00ee4ae789..a2bb22df3b 100644 --- a/datumaro/plugins/imagenet_txt_format.py +++ b/datumaro/plugins/imagenet_txt_format.py @@ -14,6 +14,8 @@ class ImagenetTxtPath: + DEFAULT_IMAGE_EXT = '.jpg' + IMAGE_EXT_FORMAT = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif'] LABELS_FILE = 'synsets.txt' IMAGE_DIR = 'images' @@ -56,9 +58,14 @@ def _load_items(self, path): label < len(self._categories[AnnotationType.label]), \ "Image '%s': unknown label id '%s'" % (item_id, label) anno.append(Label(label)) + image_path = osp.join(self.image_dir, item_id + + ImagenetTxtPath.DEFAULT_IMAGE_EXT) + for path in glob(osp.join(self.image_dir, item_id + '*')): + if osp.splitext(path)[1] in ImagenetTxtPath.IMAGE_EXT_FORMAT: + image_path = path + break items[item_id] = DatasetItem(id=item_id, subset=self._subset, - image=osp.join(self.image_dir, item_id + '.jpg'), - annotations=anno) + image=image_path, annotations=anno) return items @@ -75,7 +82,7 @@ def find_sources(cls, path): class ImagenetTxtConverter(Converter): - DEFAULT_IMAGE_EXT = '.jpg' + DEFAULT_IMAGE_EXT = ImagenetTxtPath.DEFAULT_IMAGE_EXT def apply(self): subset_dir = self._save_dir