Skip to content

Commit

Permalink
Adding aggregation feature (#2)
Browse files Browse the repository at this point in the history
  • Loading branch information
Femme Phile committed Oct 17, 2023
1 parent 0f96d50 commit 43584bc
Showing 1 changed file with 158 additions and 27 deletions.
185 changes: 158 additions & 27 deletions src/cleanvision/videolab.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
"""Videolab is an extension of Imagelab for finding issues in a video dataset."""
from pathlib import Path
from typing import Any
from typing import Dict
from typing import Generator
from typing import List
from typing import Optional
from typing import Any, Dict, Generator, List, Optional

Check warning on line 3 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L2-L3

Added lines #L2 - L3 were not covered by tests

import av
from cleanvision.imagelab import Imagelab
import pandas as pd
from PIL.Image import Image

Check warning on line 7 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L5-L7

Added lines #L5 - L7 were not covered by tests

from cleanvision.imagelab import Imagelab
from cleanvision.utils.utils import get_is_issue_colname

Check warning on line 10 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L9-L10

Added lines #L9 - L10 were not covered by tests

VIDEO_FILE_EXTENSIONS = ["*.mp4", "*.avi", "*.mkv", "*.mov", "*.webm"]

Check warning on line 12 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L12

Added line #L12 was not covered by tests

Expand Down Expand Up @@ -54,16 +52,16 @@ def sample(self, video_file: Path, output_dir: Path) -> None:
frame_pil.save(sample_sub_dir / image_file_name)

Check warning on line 52 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L52

Added line #L52 was not covered by tests


class Videolab:
class Videolab(Imagelab):

Check warning on line 55 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L55

Added line #L55 was not covered by tests
"""A single class to find all types of issues in video datasets."""

def __init__(

Check warning on line 58 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L58

Added line #L58 was not covered by tests
self,
video_dir: str,
) -> None:
"""Create Path object from video directory string."""
# store video directory path
self.video_dir: Path = Path(video_dir)

Check warning on line 64 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L64

Added line #L64 was not covered by tests
self.imagelab: Optional[Imagelab] = None

def _find_videos(self) -> Generator[Path, None, None]:

Check warning on line 66 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L66

Added line #L66 was not covered by tests
"""Iterate over video files in video directory."""
Expand All @@ -82,6 +80,85 @@ def _sample_frames(self, samples_dir: Path, sample_interval: int) -> None:
# sample frames from target video data directory
frame_sampler.sample(video_file, samples_dir)

Check warning on line 81 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L81

Added line #L81 was not covered by tests

def _parent_dir_frame_samples_dict(self) -> Dict[str, List[str]]:

Check warning on line 83 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L83

Added line #L83 was not covered by tests
"""Creates dictionary of parent directory and frame samples."""
# set dict
cluster_frame_samples: Dict[str, List[str]] = {}

Check warning on line 86 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L86

Added line #L86 was not covered by tests

# looper over index
for img_path in self.issues.index:
# get frame sample parent
sample_dir = Path(img_path).parents[0]

Check warning on line 91 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L91

Added line #L91 was not covered by tests

# get key
key = str(sample_dir)

Check warning on line 94 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L94

Added line #L94 was not covered by tests

# check if key exists
if key in cluster_frame_samples:
# update
cluster_frame_samples[key].append(img_path)

Check warning on line 99 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L99

Added line #L99 was not covered by tests

else:
# create new entry
cluster_frame_samples[key] = [img_path]

Check warning on line 103 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L103

Added line #L103 was not covered by tests

# get cluster dict
return cluster_frame_samples

Check warning on line 106 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L106

Added line #L106 was not covered by tests

def _aggregate_issues(self) -> pd.DataFrame:

Check warning on line 108 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L108

Added line #L108 was not covered by tests
"""Aggregate Imagelab issues into a single frame for each video."""
# convert booleans to floats
pure_float_issues = self.issues * 1

Check warning on line 111 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L111

Added line #L111 was not covered by tests

# store new aggregate_issues
aggregate_issues = []

Check warning on line 114 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L114

Added line #L114 was not covered by tests

# loop over clusters
for _, indexes in self._parent_dir_frame_samples_dict().items():
# get all frame issues for sample_dir subset
frame_issues = pure_float_issues.loc[indexes]

Check warning on line 119 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L119

Added line #L119 was not covered by tests

# calculate new index
new_index = indexes[int(len(indexes) / 2)]

Check warning on line 122 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L122

Added line #L122 was not covered by tests

# create aggregated scores df
aggregate_issues.append(

Check warning on line 125 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L125

Added line #L125 was not covered by tests
pd.DataFrame(frame_issues.mean().to_dict(), index=[new_index])
)

# finally create a new DataFrame of all aggregate results
agg_df = pd.concat(aggregate_issues)

Check warning on line 130 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L130

Added line #L130 was not covered by tests

# create lists of columns
issue_columns = [get_is_issue_colname(issue) for issue in self._issue_types]

# convert float represent average booleans back to booleans
agg_df[issue_columns] = agg_df[issue_columns].astype(bool)

Check warning on line 136 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L136

Added line #L136 was not covered by tests

# return the aggregated dataframe
return agg_df

Check warning on line 139 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L139

Added line #L139 was not covered by tests

def _aggregate_summary(self) -> pd.DataFrame:

Check warning on line 141 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L141

Added line #L141 was not covered by tests
"""Create issues summary for aggregate issues."""
# setup issue summary storage
summary_dict = {}

Check warning on line 144 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L144

Added line #L144 was not covered by tests

# loop over issue type
for issue_type in self._issue_types:
# add individual type summaries
summary_dict[issue_type] = {

Check warning on line 149 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L149

Added line #L149 was not covered by tests
"num_images": self.agg_issues[get_is_issue_colname(issue_type)].sum()
}

# reshape summary dataframe
agg_summary = pd.DataFrame.from_dict(summary_dict, orient="index")
agg_summary = agg_summary.reset_index()
agg_summary = agg_summary.rename(columns={"index": "issue_type"})
agg_summary = agg_summary.astype({"num_images": int, "issue_type": str})

Check warning on line 157 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L154-L157

Added lines #L154 - L157 were not covered by tests

# return aggregate summary
return agg_summary

Check warning on line 160 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L160

Added line #L160 was not covered by tests

def find_issues(

Check warning on line 162 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L162

Added line #L162 was not covered by tests
self,
frame_samples_dir: str,
Expand All @@ -94,13 +171,17 @@ def find_issues(
# create sample frames
self._sample_frames(Path(frame_samples_dir), frame_samples_interval)

Check warning on line 172 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L172

Added line #L172 was not covered by tests

# create Imagelab instance
self.imagelab = Imagelab(frame_samples_dir)
# call parent constructor
super().__init__(frame_samples_dir)

Check warning on line 175 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L175

Added line #L175 was not covered by tests

# call Imagelab to find issues in sampled frames
self.imagelab.find_issues(issue_types, n_jobs, verbose)
# call parent find_issues on sampled frames
super().find_issues(issue_types, n_jobs, verbose)

Check warning on line 178 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L178

Added line #L178 was not covered by tests

def report(
# update aggregate issues/summary
self.agg_issues = self._aggregate_issues()
self.agg_summary = self._aggregate_summary()

Check warning on line 182 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L181-L182

Added lines #L181 - L182 were not covered by tests

def _aggregate_report(

Check warning on line 184 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L184

Added line #L184 was not covered by tests
self,
issue_types: Optional[List[str]] = None,
max_prevalence: Optional[float] = None,
Expand All @@ -109,21 +190,71 @@ def report(
print_summary: bool = True,
show_id: bool = False,
) -> None:
"""Prints summary of the issues found in your dataset."""
# check if imagelab instance exists
if self.imagelab is None:
"""Create report visualization for aggregate issues."""
assert isinstance(verbosity, int) and 0 <= verbosity < 5

user_supplied_args = locals()
report_args = self._get_report_args(user_supplied_args)

Check warning on line 197 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L196-L197

Added lines #L196 - L197 were not covered by tests

issue_types_to_report = (

Check warning on line 199 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L199

Added line #L199 was not covered by tests
issue_types if issue_types else self.agg_summary["issue_type"].tolist()
)

# filter issues based on max_prevalence in the dataset
filtered_issue_types = self._filter_report(

Check warning on line 204 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L204

Added line #L204 was not covered by tests
issue_types_to_report, report_args["max_prevalence"]
)

issue_summary = self.agg_summary[

Check warning on line 208 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L208

Added line #L208 was not covered by tests
self.agg_summary["issue_type"].isin(filtered_issue_types)
]
if len(issue_summary) > 0:
if verbosity:

Check warning on line 212 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L212

Added line #L212 was not covered by tests
print("Issues found in videos in order of severity in the dataset\n")
if print_summary:
self._pprint_issue_summary(issue_summary)

Check warning on line 215 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L215

Added line #L215 was not covered by tests
for issue_type in filtered_issue_types:
if (
self.agg_summary.query(f"issue_type == {issue_type!r}")[
"num_images"
].values[0]
== 0
):
continue
print(f"{' ' + issue_type + ' frames ':-^60}\n")
print(

Check warning on line 225 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L223-L225

Added lines #L223 - L225 were not covered by tests
f"Number of examples with this issue: "
f"{self.agg_issues[get_is_issue_colname(issue_type)].sum()}\n"
f"Examples representing most severe instances of this issue:\n"
)
self._visualize(

Check warning on line 230 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L230

Added line #L230 was not covered by tests
issue_type,
report_args["num_images"],
report_args["cell_size"],
show_id,
)
else:
print(

Check warning on line 237 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L237

Added line #L237 was not covered by tests
"Please specify some issue_types to"
"Please specify some issue_types to "
"check for in videolab.find_issues()."
)

else:
# report on video frame samples
self.imagelab.report(
issue_types,
max_prevalence,
num_images,
verbosity,
print_summary,
show_id,
)
def report(

Check warning on line 242 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L242

Added line #L242 was not covered by tests
self,
issue_types: Optional[List[str]] = None,
max_prevalence: Optional[float] = None,
num_images: Optional[int] = None,
verbosity: int = 1,
print_summary: bool = True,
show_id: bool = False,
) -> None:
"""Prints summary of the issues found in your dataset."""
# report on video frame samples
self._aggregate_report(

Check warning on line 253 in src/cleanvision/videolab.py

View check run for this annotation

Codecov / codecov/patch

src/cleanvision/videolab.py#L253

Added line #L253 was not covered by tests
issue_types,
max_prevalence,
num_images,
verbosity,
print_summary,
show_id,
)

0 comments on commit 43584bc

Please sign in to comment.