Skip to content

Commit

Permalink
add stats property for Coco class (#70)
Browse files Browse the repository at this point in the history
* add stats property for Coco class

* add more stats

* add docs for stats property
  • Loading branch information
fcakyon authored May 6, 2021
1 parent 64bc28d commit 571e7a2
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 4 deletions.
32 changes: 32 additions & 0 deletions docs/COCO.md
Original file line number Diff line number Diff line change
Expand Up @@ -172,3 +172,35 @@ subsampled_coco = coco.get_subsampled_coco(subsample_ratio=10)
# export subsampled COCO dataset
save_json(subsampled_coco.json, "subsampled_coco.json")
```

## Get dataset stats:

```python
from sahi.utils.coco import Coco

# init Coco object
coco = Coco.from_coco_dict_or_path("coco.json")

# get dataset stats
coco.stats
{
'avg_annotation_area': 2448.405738278109,
'avg_num_annotations_in_image': 53.037243084530985,
'max_annotation_area': 328640,
'max_num_annotations_in_image': 902,
'min_annotation_area': 3,
'min_num_annotations_in_image': 1,
'num_annotations': 343204,
'num_annotations_per_category': {
'human': 106396,
'vehicle': 236808
},
'num_categories': 2,
'num_images': 6471,
'num_images_per_category': {
'human': 5684,
'vehicle': 6323
}
}

```
72 changes: 69 additions & 3 deletions sahi/utils/coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import copy
import os
from collections import OrderedDict, defaultdict
from collections import Counter, OrderedDict, defaultdict
from dataclasses import dataclass
from multiprocessing import Pool
from pathlib import Path
Expand Down Expand Up @@ -737,7 +737,7 @@ def __repr__(self):


class Coco:
def __init__(self, name=None, image_dir=None, remapping_dict=None):
def __init__(self, name=None, image_dir=None, remapping_dict=None, ignore_negative_samples=True):
"""
Creates Coco object.
Expand All @@ -748,12 +748,16 @@ def __init__(self, name=None, image_dir=None, remapping_dict=None):
Base file directory that contains dataset images. Required for dataset merging.
remapping_dict: dict
{1:0, 2:1} maps category id 1 to 0 and category id 2 to 1
ignore_negative_samples: bool
If True ignores images without annotations in all operations.
"""
self.name = name
self.image_dir = image_dir
self.remapping_dict = remapping_dict
self.ignore_negative_samples = ignore_negative_samples
self.categories = []
self.images = []
self._stats = None

def add_categories_from_coco_category_list(self, coco_category_list):
"""
Expand Down Expand Up @@ -998,9 +1002,71 @@ def json(self):
return create_coco_dict(
images=self.images,
categories=self.json_categories,
ignore_negative_samples=True,
ignore_negative_samples=self.ignore_negative_samples,
)

@property
def stats(self):
if not self._stats:
self.calculate_stats()
return self._stats

def calculate_stats(self):
"""
Iterates over all annotations and calculates total number of
"""
num_annotations = 0
num_images = len(self.images)
num_categories = len(self.json_categories)
category_name_to_zero = {category["name"]:0 for category in self.json_categories}
num_images_per_category = copy.deepcopy(category_name_to_zero)
num_annotations_per_category = copy.deepcopy(category_name_to_zero)
min_num_annotations_in_image = 1e10
max_num_annotations_in_image = 0
total_annotation_area = 0
min_annotation_area = 1e10
max_annotation_area = 0
for image in self.images:
image_contains_category = {}
for annotation in image.annotations:
annotation_area = annotation.area
total_annotation_area += annotation_area
num_annotations_per_category[annotation.category_name] += 1
image_contains_category[annotation.category_name] = 1
# update min&max annotation area
if annotation_area>max_annotation_area:
max_annotation_area = annotation_area
if annotation_area<min_annotation_area:
min_annotation_area = annotation_area
# update num_annotations
num_annotations += len(image.annotations)
# update num_images_per_category
num_images_per_category = dict(
Counter(num_images_per_category) + Counter(image_contains_category)
)
# update min&max_num_annotations_in_image
num_annotations_in_image = len(image.annotations)
if num_annotations_in_image>max_num_annotations_in_image:
max_num_annotations_in_image = num_annotations_in_image
if num_annotations_in_image<min_num_annotations_in_image:
min_num_annotations_in_image = num_annotations_in_image
avg_num_annotations_in_image = num_annotations/num_images
avg_annotation_area = total_annotation_area/num_annotations

self._stats = {
"num_images": num_images,
"num_annotations": num_annotations,
"num_categories": num_categories,
"num_images_per_category": num_images_per_category,
"num_annotations_per_category": num_annotations_per_category,
"min_num_annotations_in_image": min_num_annotations_in_image,
"max_num_annotations_in_image": max_num_annotations_in_image,
"avg_num_annotations_in_image": avg_num_annotations_in_image,
"min_annotation_area": min_annotation_area,
"max_annotation_area": max_annotation_area,
"avg_annotation_area": avg_annotation_area
}

def split_coco_as_train_val(
self, train_split_rate=0.9, numpy_seed=0
):
Expand Down
26 changes: 25 additions & 1 deletion tests/test_cocoutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,18 @@ def test_coco(self):
coco2.category_mapping,
category_mapping,
)
self.assertEqual(
coco1.stats,
coco2.stats,
)
self.assertEqual(
coco1.stats["num_images"],
len(coco1.images),
)
self.assertEqual(
coco1.stats["num_annotations"],
len(coco1.json["annotations"]),
)

def test_split_coco_as_train_val(self):
from sahi.utils.coco import Coco
Expand All @@ -300,11 +312,15 @@ def test_split_coco_as_train_val(self):
self.assertEqual(len(result["train_coco"].json["annotations"]), 5)
self.assertEqual(result["train_coco"].json["images"][0]["height"], 682)
self.assertEqual(result["train_coco"].image_dir, image_dir)
self.assertEqual(result["train_coco"].stats["num_images"], len(result["train_coco"].images))
self.assertEqual(result["train_coco"].stats["num_annotations"], len(result["train_coco"].json["annotations"]))

self.assertEqual(len(result["val_coco"].json["images"]), 1)
self.assertEqual(len(result["val_coco"].json["annotations"]), 7)
self.assertEqual(result["val_coco"].json["images"][0]["height"], 1365)
self.assertEqual(result["val_coco"].image_dir, image_dir)
self.assertEqual(result["val_coco"].stats["num_images"], len(result["val_coco"].images))
self.assertEqual(result["val_coco"].stats["num_annotations"], len(result["val_coco"].json["annotations"]))

def test_coco2yolo(self):
from sahi.utils.coco import Coco
Expand Down Expand Up @@ -367,6 +383,8 @@ def test_coco_update_categories(self):
)
self.assertEqual(coco.json["annotations"][1]["category_id"], 1)
self.assertEqual(coco.image_dir, image_dir)
self.assertEqual(coco.stats["num_images"], len(coco.images))
self.assertEqual(coco.stats["num_annotations"], len(coco.json["annotations"]))

# update categories
desired_name2id = {"human": 1, "car": 2, "big_vehicle": 3}
Expand All @@ -385,6 +403,8 @@ def test_coco_update_categories(self):
)
self.assertEqual(coco.json["annotations"][1]["category_id"], 2)
self.assertEqual(coco.image_dir, image_dir)
self.assertEqual(coco.stats["num_images"], len(coco.images))
self.assertEqual(coco.stats["num_annotations"], len(coco.json["annotations"]))

def test_get_imageid2annotationlist_mapping(self):
from sahi.utils.coco import get_imageid2annotationlist_mapping
Expand All @@ -405,7 +425,7 @@ def check_image_id(image_id):
check_image_id(image_id=1)
check_image_id(image_id=2)

def test_get_imageid2annotationlist_mapping_mt(self):
def test_get_imageid2annotationlist_mapping_mp(self):
from sahi.utils.coco import get_imageid2annotationlist_mapping_mp

coco_path = "tests/data/coco_utils/combined_coco.json"
Expand Down Expand Up @@ -547,6 +567,8 @@ def test_coco_merge(self):
coco2.image_dir,
image_dir,
)
self.assertEqual(coco2.stats["num_images"], len(coco2.images))
self.assertEqual(coco2.stats["num_annotations"], len(coco2.json["annotations"]))

def test_get_subsampled_coco(self):
from sahi.utils.coco import Coco
Expand Down Expand Up @@ -580,6 +602,8 @@ def test_get_subsampled_coco(self):
subsampled_coco.image_dir,
image_dir,
)
self.assertEqual(subsampled_coco.stats["num_images"], len(subsampled_coco.images))
self.assertEqual(subsampled_coco.stats["num_annotations"], len(subsampled_coco.json["annotations"]))

def test_cocovid(self):
from sahi.utils.coco import CocoVid
Expand Down

0 comments on commit 571e7a2

Please sign in to comment.