Skip to content

Commit

Permalink
Fix in ImageNet_txt (#302)
Browse files Browse the repository at this point in the history
* Add extensions for images to annotation file

* Remove image search in extractor

* Update changelog

Co-authored-by: Maxim Zhiltsov <[email protected]>
  • Loading branch information
yasakova-anastasia and Maxim Zhiltsov authored Jun 23, 2021
1 parent 8a5cfd1 commit 7e35c81
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 20 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Changed
- Tensorflow AVX check is made optional in API and is disabled by default (<https://github.com/openvinotoolkit/datumaro/pull/305>)
- Extensions for images in ImageNet_txt are now mandatory (<https://github.com/openvinotoolkit/datumaro/pull/302>)

### Deprecated
-
Expand Down
29 changes: 13 additions & 16 deletions datumaro/plugins/imagenet_txt_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
LabelCategories, AnnotationType, SourceExtractor, Importer
)
from datumaro.components.converter import Converter
from datumaro.util.image import find_images


class ImagenetTxtPath:
Expand Down Expand Up @@ -49,26 +48,22 @@ def _load_categories(self, labels):
def _load_items(self, path):
items = {}

image_dir = self.image_dir
if osp.isdir(image_dir):
images = { osp.splitext(osp.relpath(p, image_dir))[0]: p
for p in find_images(image_dir, recursive=True) }
else:
images = {}

with open(path, encoding='utf-8') as f:
for line in f:
item = line.split('\"')
if 1 < len(item):
if len(item) == 3:
item_id = item[1]
label_ids = [int(id) for id in item[2].split()]
item = item[2].split()
image = item_id + item[0]
label_ids = [int(id) for id in item[1:]]
else:
raise Exception("Line %s: unexpected number "
"of quotes in filename" % line)
else:
item = line.split()
item_id = item[0]
item_id = osp.splitext(item[0])[0]
image = item[0]
label_ids = [int(id) for id in item[1:]]

anno = []
Expand All @@ -79,7 +74,7 @@ def _load_items(self, path):
anno.append(Label(label))

items[item_id] = DatasetItem(id=item_id, subset=self._subset,
image=images.get(item_id), annotations=anno)
image=osp.join(self.image_dir, image), annotations=anno)

return items

Expand All @@ -105,18 +100,20 @@ def apply(self):

labels = {}
for item in subset:
labels[item.id] = set(p.label for p in item.annotations
item_id = item.id
if 1 < len(item_id.split()):
item_id = '\"' + item_id + '\"'
item_id += self._find_image_ext(item)
labels[item_id] = set(p.label for p in item.annotations
if p.type == AnnotationType.label)

if self._save_images and item.has_image:
self._save_image(item, subdir=ImagenetTxtPath.IMAGE_DIR)

annotation = ''
for item_id, item_labels in labels.items():
if 1 < len(item_id.split()):
item_id = '\"' + item_id + '\"'
annotation += '%s %s\n' % (
item_id, ' '.join(str(l) for l in item_labels))
annotation += '%s %s\n' % (item_id,
' '.join(str(l) for l in item_labels))

with open(annotation_file, 'w', encoding='utf-8') as f:
f.write(annotation)
Expand Down
8 changes: 4 additions & 4 deletions tests/assets/imagenet_txt_dataset/train.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
1 0
2 5
3 3
4 5
1.jpg 0
2.jpg 5
3.jpg 3
4.jpg 5

0 comments on commit 7e35c81

Please sign in to comment.