From 5f0e16089ba33f0129e2508ea6da4d90dd1e29d5 Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <zhiltsov.max35@gmail.com>
Date: Thu, 30 Apr 2020 14:29:16 +0300
Subject: [PATCH] defer image info requesting in yolo

---
 datumaro/datumaro/components/project.py       |  2 +-
 .../datumaro/plugins/yolo_format/extractor.py | 44 ++++++++++---------
 2 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/datumaro/datumaro/components/project.py b/datumaro/datumaro/components/project.py
index 84d282c9bb77..d4468edd9b95 100644
--- a/datumaro/datumaro/components/project.py
+++ b/datumaro/datumaro/components/project.py
@@ -142,7 +142,7 @@ def load_project_as_dataset(url):
 
 class Environment:
     _builtin_plugins = None
-    PROJECT_EXTRACTOR_NAME = 'project'
+    PROJECT_EXTRACTOR_NAME = 'datumaro_project'
 
     def __init__(self, config=None):
         config = Config(config,
diff --git a/datumaro/datumaro/plugins/yolo_format/extractor.py b/datumaro/datumaro/plugins/yolo_format/extractor.py
index 11e829d4a5bd..5e2c61b3df6f 100644
--- a/datumaro/datumaro/plugins/yolo_format/extractor.py
+++ b/datumaro/datumaro/plugins/yolo_format/extractor.py
@@ -93,12 +93,6 @@ def __init__(self, config_path, image_info=None):
                     (osp.splitext(osp.basename(p.strip()))[0], p.strip())
                     for p in f
                 )
-
-            for item_id, image_path in subset.items.items():
-                image_path = self._make_local_path(image_path)
-                if not osp.isfile(image_path) and item_id not in image_info:
-                    raise Exception("Can't find image '%s'" % item_id)
-
             subsets[subset_name] = subset
 
         self._subsets = subsets
@@ -122,10 +116,9 @@ def _get(self, item_id, subset_name):
             image_path = self._make_local_path(item)
             image_size = self._image_info.get(item_id)
             image = Image(path=image_path, size=image_size)
-            h, w = image.size
 
             anno_path = osp.splitext(image_path)[0] + '.txt'
-            annotations = self._parse_annotations(anno_path, w, h)
+            annotations = self._parse_annotations(anno_path, image)
 
             item = DatasetItem(id=item_id, subset=subset_name,
                 image=image, annotations=annotations)
@@ -134,21 +127,30 @@ def _get(self, item_id, subset_name):
         return item
 
     @staticmethod
-    def _parse_annotations(anno_path, image_width, image_height):
+    def _parse_annotations(anno_path, image):
+        lines = []
         with open(anno_path, 'r') as f:
-            annotations = []
             for line in f:
-                label_id, xc, yc, w, h = line.strip().split()
-                label_id = int(label_id)
-                w = float(w)
-                h = float(h)
-                x = float(xc) - w * 0.5
-                y = float(yc) - h * 0.5
-                annotations.append(Bbox(
-                    round(x * image_width, 1), round(y * image_height, 1),
-                    round(w * image_width, 1), round(h * image_height, 1),
-                    label=label_id
-                ))
+                line = line.strip()
+                if line:
+                    lines.append(line)
+
+        annotations = []
+        if lines:
+            image_height, image_width = image.size # use image info late
+        for line in lines:
+            label_id, xc, yc, w, h = line.split()
+            label_id = int(label_id)
+            w = float(w)
+            h = float(h)
+            x = float(xc) - w * 0.5
+            y = float(yc) - h * 0.5
+            annotations.append(Bbox(
+                round(x * image_width, 1), round(y * image_height, 1),
+                round(w * image_width, 1), round(h * image_height, 1),
+                label=label_id
+            ))
+
         return annotations
 
     @staticmethod