openvinotoolkit · zhiltsov-max · Mar 17, 2021 · Mar 5, 2021 · Mar 10, 2021 · Mar 10, 2021
@@ -6,6 +6,7 @@
 import logging as log
 import os
 import os.path as osp
+import re
 from collections import OrderedDict
 from enum import Enum
 
@@ -18,8 +19,7 @@
 from datumaro.util import find, str_to_bool
 from datumaro.util.annotation_util import make_label_id_mapping
 from datumaro.util.image import save_image
-from datumaro.util.mask_tools import lazy_mask, paint_mask, generate_colormap
-
+from datumaro.util.mask_tools import generate_colormap, lazy_mask, paint_mask
 
 CamvidLabelMap = OrderedDict([
     ('Void', (0, 0, 0)),
@@ -61,6 +61,7 @@ class CamvidPath:
     SEGM_DIR = "annot"
     IMAGE_EXT = '.jpg'
     MASK_EXT = '.png'
+    PATTERN = re.compile(r'(.+[.]\S+) (.+[.]\S+)?')
 
 
 def parse_label_map(path):
@@ -159,13 +160,17 @@ def _load_items(self, path):
         items = {}
         with open(path, encoding='utf-8') as f:
             for line in f:
-                objects = line.split()
+                search = CamvidPath.PATTERN.search(line)
+                if search:
+                    objects = CamvidPath.PATTERN.search(line).groups()
+                else:
+                    raise Exception("Line %s: invalid path format" % line)
                 image = objects[0]
                 item_id = ('/'.join(image.split('/')[2:]))[:-len(CamvidPath.IMAGE_EXT)]
                 image_path = osp.join(self._dataset_dir,
                     (image, image[1:])[image[0] == '/'])
                 item_annotations = []
-                if 1 < len(objects):
+                if objects[1] != None:
                     gt = objects[1]
                     gt_path = osp.join(self._dataset_dir,
                         (gt, gt[1:]) [gt[0] == '/'])
@@ -264,7 +269,7 @@ def save_segm_lists(self, subset_name, segm_list):
             return
 
         ann_file = osp.join(self._save_dir, subset_name + '.txt')
-        with open(ann_file, 'w') as f:
+        with open(ann_file, 'w', encoding='utf-8') as f:
             for (image_path, mask_path) in segm_list.values():
                 f.write('/%s %s\n' % (
                     image_path.replace('\\', '/'),

@@ -1,4 +1,3 @@
-
 # Copyright (C) 2020 Intel Corporation
 #
 # SPDX-License-Identifier: MIT

@@ -49,9 +49,18 @@ def _load_items(self, path):
         items = {}
         with open(path, encoding='utf-8') as f:
             for line in f:
-                item = line.split()
-                item_id = item[0]
-                label_ids = [int(id) for id in item[1:]]
+                item = line.split('\"')
+                if 1 < len(item):
+                    if len(item) == 3:
+                        item_id = item[1]
+                        label_ids = [int(id) for id in item[2].split()]
+                    else:
+                        raise Exception("Line %s: unexpected number "
+                            "of quotes in filename" % line)
+                else:
+                    item = line.split()
+                    item_id = item[0]
+                    label_ids = [int(id) for id in item[1:]]
                 anno = []
                 for label in label_ids:
                     assert 0 <= label and \
@@ -95,9 +104,14 @@ def apply(self):
                 if self._save_images and item.has_image:
                     self._save_image(item, subdir=ImagenetTxtPath.IMAGE_DIR)
 
+            annotation = ''
+            for item_id, item_labels in labels.items():
+                if 1 < len(item_id.split()):
+                    item_id = '\"' + item_id + '\"'
+                annotation += '%s %s\n' % (item_id, ' '.join(item_labels))
+
             with open(annotation_file, 'w', encoding='utf-8') as f:
-                f.writelines(['%s %s\n' % (item_id, ' '.join(labels[item_id]))
-                    for item_id in labels])
+                f.write(annotation)
 
         labels_file = osp.join(subset_dir, ImagenetTxtPath.LABELS_FILE)
         with open(labels_file, 'w', encoding='utf-8') as f:

@@ -31,10 +31,14 @@ def _load_items(self, path):
         images_dir = osp.join(self._dataset_dir, self._subset, LfwPath.IMAGES_DIR)
         with open(path, encoding='utf-8') as f:
             for line in f:
-                pair = line.strip().split()
+                pair = line.strip().split('\t')
                 if len(pair) == 3:
-                    image1 = self.get_image_name(pair[0], pair[1])
-                    image2 = self.get_image_name(pair[0], pair[2])
+                    if pair[0] == '-':
+                        image1 = pair[1]
+                        image2 = pair[2]
+                    else:
+                        image1 = self.get_image_name(pair[0], pair[1])
+                        image2 = self.get_image_name(pair[0], pair[2])
                     if image1 not in items:
                         items[image1] = DatasetItem(id=image1, subset=self._subset,
                             image=osp.join(images_dir, image1 + LfwPath.IMAGE_EXT),
@@ -47,8 +51,14 @@ def _load_items(self, path):
                     attributes = items[image1].attributes
                     attributes['positive_pairs'].append(image2)
                 elif len(pair) == 4:
-                    image1 = self.get_image_name(pair[0], pair[1])
-                    image2 = self.get_image_name(pair[2], pair[3])
+                    if pair[0] == '-':
+                        image1 = pair[1]
+                    else:
+                        image1 = self.get_image_name(pair[0], pair[1])
+                    if pair[2] == '-':
+                        image2 = pair[3]
+                    else:
+                        image2 = self.get_image_name(pair[2], pair[3])
                     if image1 not in items:
                         items[image1] = DatasetItem(id=image1, subset=self._subset,
                             image=osp.join(images_dir, image1 + LfwPath.IMAGE_EXT),
@@ -102,17 +112,31 @@ def apply(self):
                     self._save_image(item, osp.join(self._save_dir, subset_name,
                         LfwPath.IMAGES_DIR, item.id + LfwPath.IMAGE_EXT))
 
-                person1, num1 = LfwPath.PATTERN.search(item.id).groups()
-                num1 = int(num1)
+                search = LfwPath.PATTERN.search(item.id)
+                if search:
+                    person1, num1 = search.groups()
+                    num1 = int(num1)
+                else:
+                    person1 = '-'
+                    num1 = item.id
                 if 'positive_pairs' in item.attributes:
                     for pair in item.attributes['positive_pairs']:
-                        num2 = LfwPath.PATTERN.search(pair).groups()[1]
-                        num2 = int(num2)
+                        search = LfwPath.PATTERN.search(pair)
+                        if search:
+                            num2 = search.groups()[1]
+                            num2 = int(num2)
+                        else:
+                            num2 = pair
                         positive_pairs.append('%s\t%s\t%s' % (person1, num1, num2))
                 if 'negative_pairs' in item.attributes:
                     for pair in item.attributes['negative_pairs']:
-                        person2, num2 = LfwPath.PATTERN.search(pair).groups()
-                        num2 = int(num2)
+                        search = LfwPath.PATTERN.search(pair)
+                        if search:
+                            person2, num2 = search.groups()
+                            num2 = int(num2)
+                        else:
+                            person2 = '-'
+                            num2 = pair
                         negative_pairs.append('%s\t%s\t%s\t%s' % \
                             (person1, num1, person2, num2))
 

@@ -68,7 +68,7 @@ def _split_item_path(path):
 
         items = {}
 
-        with open(path) as content:
+        with open(path, encoding='utf-8') as content:
             landmarks_table = list(csv.DictReader(content))
         for row in landmarks_table:
             item_id = row['NAME_ID']
@@ -96,7 +96,7 @@ def _split_item_path(path):
         bboxes_path = osp.join(self._dataset_dir, VggFace2Path.ANNOTATION_DIR,
             VggFace2Path.BBOXES_FILE + self._subset + '.csv')
         if osp.isfile(bboxes_path):
-            with open(bboxes_path) as content:
+            with open(bboxes_path, encoding='utf-8') as content:
                 bboxes_table = list(csv.DictReader(content))
             for row in bboxes_table:
                 item_id = row['NAME_ID']
@@ -221,7 +221,7 @@ def apply(self):
             landmarks_path = osp.join(save_dir, VggFace2Path.ANNOTATION_DIR,
                 VggFace2Path.LANDMARKS_FILE + subset_name + '.csv')
             os.makedirs(osp.dirname(landmarks_path), exist_ok=True)
-            with open(landmarks_path, 'w', newline='') as file:
+            with open(landmarks_path, 'w', encoding='utf-8', newline='') as file:
                 columns = ['NAME_ID', 'P1X', 'P1Y', 'P2X', 'P2Y',
                     'P3X', 'P3Y', 'P4X', 'P4Y', 'P5X', 'P5Y']
                 writer = csv.DictWriter(file, fieldnames=columns)
@@ -232,7 +232,7 @@ def apply(self):
                 bboxes_path = osp.join(save_dir, VggFace2Path.ANNOTATION_DIR,
                     VggFace2Path.BBOXES_FILE + subset_name + '.csv')
                 os.makedirs(osp.dirname(bboxes_path), exist_ok=True)
-                with open(bboxes_path, 'w', newline='') as file:
+                with open(bboxes_path, 'w', encoding='utf-8', newline='') as file:
                     columns = ['NAME_ID', 'X', 'Y', 'W', 'H']
                     writer = csv.DictWriter(file, fieldnames=columns)
                     writer.writeheader()

@@ -297,7 +297,7 @@ def save_subsets(self):
                             VocTask.action_classification}:
                         ann_path = osp.join(self._ann_dir, item.id + '.xml')
                         os.makedirs(osp.dirname(ann_path), exist_ok=True)
-                        with open(ann_path, 'w') as f:
+                        with open(ann_path, 'w', encoding='utf-8') as f:
                             f.write(ET.tostring(root_elem,
                                 encoding='unicode', pretty_print=True))
 
@@ -351,7 +351,7 @@ def save_subsets(self):
     @staticmethod
     def _get_filtered_lines(path, patch, subset, items=None):
         lines = {}
-        with open(path) as f:
+        with open(path, encoding='utf-8') as f:
             for line in f:
                 item, text, _ = line.split(maxsplit=1) + ['', '']
                 if not patch or patch.updated_items.get((item, subset)) != \
@@ -368,8 +368,10 @@ def save_action_lists(self, subset_name, action_list):
         items = {k: True for k in action_list}
         if self._patch and osp.isfile(ann_file):
             self._get_filtered_lines(ann_file, self._patch, subset_name, items)
-        with open(ann_file, 'w') as f:
+        with open(ann_file, 'w', encoding='utf-8') as f:
             for item in items:
+                # if 1 < len(item.split()):
+                #     item = '\"' + item + '\"'
                 f.write('%s\n' % item)
 
         if not items and not self._patch:
@@ -393,7 +395,7 @@ def _write_item(f, item, objs, action):
             if self._patch and osp.isfile(ann_file):
                 lines = self._get_filtered_lines(ann_file, None, subset_name)
 
-            with open(ann_file, 'w') as f:
+            with open(ann_file, 'w', encoding='utf-8') as f:
                 for item in items:
                     if item in action_list:
                         _write_item(f, item, action_list[item], action)
@@ -419,7 +421,7 @@ def _write_item(f, item, item_labels):
                 lines = self._get_filtered_lines(ann_file, self._patch,
                     subset_name, items)
 
-            with open(ann_file, 'w') as f:
+            with open(ann_file, 'w', encoding='utf-8') as f:
                 for item in items:
                     if item in class_lists:
                         _write_item(f, item, class_lists[item])
@@ -434,8 +436,10 @@ def save_clsdet_lists(self, subset_name, clsdet_list):
         if self._patch and osp.isfile(ann_file):
             self._get_filtered_lines(ann_file, self._patch, subset_name, items)
 
-        with open(ann_file, 'w') as f:
+        with open(ann_file, 'w', encoding='utf-8') as f:
             for item in items:
+                # if 1 < len(item.split()):
+                #     item = '\"' + item + '\"'
                 f.write('%s\n' % item)
 
     def save_segm_lists(self, subset_name, segm_list):
@@ -446,12 +450,16 @@ def save_segm_lists(self, subset_name, segm_list):
         if self._patch and osp.isfile(ann_file):
             self._get_filtered_lines(ann_file, self._patch, subset_name, items)
 
-        with open(ann_file, 'w') as f:
+        with open(ann_file, 'w', encoding='utf-8') as f:
             for item in items:
+                # if 1 < len(item.split()):
+                #     item = '\"' + item + '\"'
                 f.write('%s\n' % item)
 
     def save_layout_lists(self, subset_name, layout_list):
         def _write_item(f, item, item_layouts):
+            # if 1 < len(item.split()):
+            #     item = '\"' + item + '\"'
             if item_layouts:
                 for obj_id in item_layouts:
                     f.write('%s % d\n' % (item, 1 + obj_id))
@@ -466,7 +474,7 @@ def _write_item(f, item, item_layouts):
         if self._patch and osp.isfile(ann_file):
             self._get_filtered_lines(ann_file, self._patch, subset_name, items)
 
-        with open(ann_file, 'w') as f:
+        with open(ann_file, 'w', encoding='utf-8') as f:
             for item in items:
                 if item in layout_list:
                     _write_item(f, item, layout_list[item])

@@ -58,8 +58,14 @@ def _load_categories(dataset_path):
 
     @staticmethod
     def _load_subset_list(subset_path):
-        with open(subset_path) as f:
-            return [line.split()[0] for line in f]
+        subset_list = []
+        with open(subset_path, encoding='utf-8') as f:
+            for line in f:
+                line = line.strip().split()
+                if 2 < len(line):
+                    line[0] = ' '.join(line[i] for i in range(len(line)))
+                subset_list.append(line[0])
+            return subset_list
 
 class VocClassificationExtractor(_VocExtractor):
     def __iter__(self):
@@ -78,11 +84,13 @@ def _load_annotations(self):
         anno_files = [s for s in dir_items(task_dir, '.txt')
             if s.endswith('_' + osp.basename(self._path))]
         for ann_filename in anno_files:
-            with open(osp.join(task_dir, ann_filename)) as f:
+            with open(osp.join(task_dir, ann_filename), encoding='utf-8') as f:
                 label = ann_filename[:ann_filename.rfind('_')]
                 label_id = self._get_label_id(label)
                 for line in f:
-                    item, present = line.split()
+                    objects = line.split()
+                    item = ' '.join(objects[i] for i in range(len(objects) - 1))
+                    present = objects[-1]
                     if present == '1':
                         annotations[item].append(label_id)
 

@@ -62,7 +62,7 @@ def _load_categories(self):
     def _load_items(self, path):
         items = {}
 
-        with open(path, 'r') as f:
+        with open(path, 'r', encoding='utf-8') as f:
             lines = f.readlines()
 
         image_ids = [image_id for image_id, line in enumerate(lines)
@@ -178,5 +178,5 @@ def apply(self):
             annotation_path = osp.join(save_dir, WiderFacePath.ANNOTATIONS_DIR,
                 'wider_face_' + subset_name + '_bbx_gt.txt')
             os.makedirs(osp.dirname(annotation_path), exist_ok=True)
-            with open(annotation_path, 'w') as f:
+            with open(annotation_path, 'w', encoding='utf-8') as f:
                 f.write(wider_annotation)
@@ -39,7 +39,7 @@ def apply(self):
         label_categories = extractor.categories()[AnnotationType.label]
         label_ids = {label.name: idx
             for idx, label in enumerate(label_categories.items)}
-        with open(osp.join(save_dir, 'obj.names'), 'w') as f:
+        with open(osp.join(save_dir, 'obj.names'), 'w', encoding='utf-8') as f:
             f.writelines('%s\n' % l[0]
                 for l in sorted(label_ids.items(), key=lambda x: x[1]))
 
@@ -88,15 +88,15 @@ def apply(self):
 
                 annotation_path = osp.join(subset_dir, '%s.txt' % item.id)
                 os.makedirs(osp.dirname(annotation_path), exist_ok=True)
-                with open(annotation_path, 'w') as f:
+                with open(annotation_path, 'w', encoding='utf-8') as f:
                     f.write(yolo_annotation)
 
             subset_list_name = '%s.txt' % subset_name
             subset_lists[subset_name] = subset_list_name
-            with open(osp.join(save_dir, subset_list_name), 'w') as f:
+            with open(osp.join(save_dir, subset_list_name), 'w', encoding='utf-8') as f:
                 f.writelines('%s\n' % s for s in image_paths.values())
 
-        with open(osp.join(save_dir, 'obj.data'), 'w') as f:
+        with open(osp.join(save_dir, 'obj.data'), 'w', encoding='utf-8') as f:
             f.write('classes = %s\n' % len(label_ids))
 
             for subset_name, subset_list_name in subset_lists.items():