Skip to content

Commit

Permalink
Merge branch 'main' into share-magic-wormhole
Browse files Browse the repository at this point in the history
  • Loading branch information
Mustaballer authored Jun 23, 2023
2 parents 53f11f5 + fee2c72 commit 0977859
Show file tree
Hide file tree
Showing 5 changed files with 233 additions and 5 deletions.
133 changes: 133 additions & 0 deletions openadapt/scripts/scrub.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
"""Module for scrubbing a media file.
Usage:
$ python -m openadapt.scripts.scrub scrub_mp4 <mp4_file_path> \
<scrub_all_entities> <playback_speed_multiplier> <crop_start_time> \
<crop_end_time>
Parameters:
mp4_file_path: Path to the mp4 file (str)
scrub_all_entities: True/False
playback_speed_multiplier: (float/int)
crop_start_time: (int) [in seconds]
end_start_time: (int) [in seconds]
All arguments are required at command line.
Example: To redact all entities in sample2.mp4
from the 2nd second to the 16th second and play it at 2x speed:
$ python -m openadapt.scripts.scrub scrub_mp4 sample2.mp4 True 2 2 16
"""

from typing import Optional
import math

from tqdm import tqdm
from PIL import Image
from moviepy.editor import VideoFileClip, VideoClip
from moviepy.video.fx import speedx
import fire
import numpy as np

from openadapt import config, scrub, utils


def _make_frame(time, final, progress_bar, progress_threshold):
"""
Private function to scrub a frame.
Args:
time: Time (in seconds)
final: Final video clip
progress_bar: Progress bar
frame_count: Total number of frames
progress_interval: Progress interval
progress_threshold: Progress threshold
Returns:
A Redacted frame
"""

frame = final.get_frame(time)

image = Image.fromarray(frame)

redacted_image = scrub.scrub_image(image) # Redaction

# Convert redacted image back to OpenCV format
redacted_frame = np.array(redacted_image)

progress_bar.update(1) # Update the progress bar

if progress_bar.n >= progress_threshold:
progress_threshold += progress_threshold

return redacted_frame


def scrub_mp4(
mp4_file: str,
scrub_all_entities: bool = False,
playback_speed_multiplier: float = 1.0,
crop_start_time: int = 0,
crop_end_time: Optional[int] = None,
) -> str:
"""
Scrub a mp4 file.
Args:
mp4_file_path: Path to the mp4 file.
scrub_all_entities: True/False. If true, scrubs all entities
playback_speed_multiplier: Multiplier for playback speed. (float/int)
crop_start_time: Start Time (in seconds)
end_start_time: End Time (in seconds)
Returns:
Path to the scrubbed (redacted) mp4 file.
"""

if scrub_all_entities:
config.SCRUB_IGNORE_ENTITIES = []

mp4_clip = VideoFileClip(mp4_file)
cropped_clip = mp4_clip.subclip(crop_start_time, crop_end_time)
final = cropped_clip.fx(VideoClip.speedx, playback_speed_multiplier)

# Prepare progress bar
frame_count = round(final.duration * final.fps)
progress_bar_format = (
"{desc}: {percentage:.0f}% "
"| {bar} | "
"{n_fmt}/{total_fmt} | {rate_fmt} | [{elapsed}<{remaining}] |"
)
progress_bar = tqdm(
total=frame_count,
desc="Processing",
unit="frame",
bar_format=progress_bar_format,
colour="green",
)
progress_interval = 0.1 # Print progress every 10% of frames
progress_threshold = math.floor(frame_count * progress_interval)

redacted_clip = VideoClip(
make_frame=lambda t: _make_frame(
t,
final,
progress_bar,
progress_threshold,
),
duration=final.duration,
) # Redact the clip

scrubbed_file = mp4_file[:-4] + "_scrubbed.mp4"
redacted_clip.write_videofile(
scrubbed_file, fps=final.fps, logger=None
) # Write the redacted clip to a file

progress_bar.close()
return "Scrubbed File Saved at: " + scrubbed_file


if __name__ == "__main__":
fire.Fire(utils.get_functions(__name__))
6 changes: 6 additions & 0 deletions openadapt/scrub.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def scrub_text(text: str, is_separated: bool = False) -> str:
Returns:
str: Scrubbed text
"""

if text is None:
return None

Expand Down Expand Up @@ -100,6 +101,7 @@ def scrub_image(
Returns:
PIL.Image: The scrubbed image with PII and PHI removed.
"""

redacted_image = IMAGE_REDACTOR.redact(
image, fill=fill_color, entities=SCRUBBING_ENTITIES
)
Expand Down Expand Up @@ -159,6 +161,7 @@ def _scrub_text_item(
Returns:
str: The scrubbed value
"""

if key in ("text", "canonical_text"):
return scrub_text(value, is_separated=True)
if force_scrub_children:
Expand Down Expand Up @@ -203,6 +206,7 @@ def _scrub_list_item(
Returns:
dict/str: The scrubbed dict/value respectively
"""

if isinstance(item, dict):
return scrub_dict(
item, list_keys, force_scrub_children=force_scrub_children
Expand All @@ -225,6 +229,7 @@ def scrub_dict(
Returns:
dict: The scrubbed dict with PII and PHI removed.
"""

if list_keys is None:
list_keys = config.SCRUB_KEYS_HTML

Expand Down Expand Up @@ -270,6 +275,7 @@ def scrub_list_dicts(input_list: List[Dict], list_keys: List = None) -> List[Dic
Returns:
list[dict]: The scrubbed list of dicts with PII and PHI removed.
"""

scrubbed_list_dicts = []
for input_dict in input_list:
scrubbed_list_dicts.append(scrub_dict(input_dict, list_keys))
Expand Down
95 changes: 91 additions & 4 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ torchvision = "^0.15.2"
sumy = "0.11.0"
nltk = "3.8.1"
pywinauto = {version = "^0.6.8", markers = "sys_platform == 'win32'"}
moviepy = "1.0.3"
python-levenshtein = "^0.21.1"
magic-wormhole = "0.12.0"

Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ fuzzywuzzy==0.18.0
ipdb==0.13.11
loguru==0.6.0
matplotlib==3.6.2
moviepy==1.0.3
mss==6.1.0
nltk==3.8.1
openai==0.27.5
Expand Down Expand Up @@ -38,4 +39,4 @@ magic-wormhole==0.12.0
nicegui==1.2.16
transformers==4.29.2
python-dotenv==1.0.0
python-Levenshtein==0.21.1
python-Levenshtein==0.21.1

0 comments on commit 0977859

Please sign in to comment.