Merge branch 'main' into share-magic-wormhole

OpenAdaptAI · Jun 23, 2023 · 0977859 · 0977859
2 parents 53f11f5 + fee2c72
commit 0977859
Show file tree

Hide file tree

Showing 5 changed files with 233 additions and 5 deletions.
diff --git a/openadapt/scripts/scrub.py b/openadapt/scripts/scrub.py
@@ -0,0 +1,133 @@
+"""Module for scrubbing a media file.
+
+Usage:
+    $ python -m openadapt.scripts.scrub scrub_mp4 <mp4_file_path> \
+        <scrub_all_entities> <playback_speed_multiplier> <crop_start_time> \
+        <crop_end_time>
+
+Parameters:
+        mp4_file_path: Path to the mp4 file (str)
+        scrub_all_entities: True/False
+        playback_speed_multiplier: (float/int)
+        crop_start_time: (int) [in seconds]
+        end_start_time: (int) [in seconds]
+
+    All arguments are required at command line.
+
+Example: To redact all entities in sample2.mp4
+         from the 2nd second to the 16th second and play it at 2x speed:
+    $ python -m openadapt.scripts.scrub scrub_mp4 sample2.mp4 True 2 2 16
+"""
+
+from typing import Optional
+import math
+
+from tqdm import tqdm
+from PIL import Image
+from moviepy.editor import VideoFileClip, VideoClip
+from moviepy.video.fx import speedx
+import fire
+import numpy as np
+
+from openadapt import config, scrub, utils
+
+
+def _make_frame(time, final, progress_bar, progress_threshold):
+    """
+    Private function to scrub a frame.
+
+    Args:
+        time: Time (in seconds)
+        final: Final video clip
+        progress_bar: Progress bar
+        frame_count: Total number of frames
+        progress_interval: Progress interval
+        progress_threshold: Progress threshold
+
+    Returns:
+        A Redacted frame
+    """
+
+    frame = final.get_frame(time)
+
+    image = Image.fromarray(frame)
+
+    redacted_image = scrub.scrub_image(image)  # Redaction
+
+    # Convert redacted image back to OpenCV format
+    redacted_frame = np.array(redacted_image)
+
+    progress_bar.update(1)  # Update the progress bar
+
+    if progress_bar.n >= progress_threshold:
+        progress_threshold += progress_threshold
+
+    return redacted_frame
+
+
+def scrub_mp4(
+    mp4_file: str,
+    scrub_all_entities: bool = False,
+    playback_speed_multiplier: float = 1.0,
+    crop_start_time: int = 0,
+    crop_end_time: Optional[int] = None,
+) -> str:
+    """
+    Scrub a mp4 file.
+
+    Args:
+        mp4_file_path: Path to the mp4 file.
+        scrub_all_entities: True/False. If true, scrubs all entities
+        playback_speed_multiplier: Multiplier for playback speed. (float/int)
+        crop_start_time: Start Time (in seconds)
+        end_start_time: End Time (in seconds)
+
+    Returns:
+        Path to the scrubbed (redacted) mp4 file.
+    """
+
+    if scrub_all_entities:
+        config.SCRUB_IGNORE_ENTITIES = []
+
+    mp4_clip = VideoFileClip(mp4_file)
+    cropped_clip = mp4_clip.subclip(crop_start_time, crop_end_time)
+    final = cropped_clip.fx(VideoClip.speedx, playback_speed_multiplier)
+
+    # Prepare progress bar
+    frame_count = round(final.duration * final.fps)
+    progress_bar_format = (
+        "{desc}: {percentage:.0f}% "
+        "| {bar} | "
+        "{n_fmt}/{total_fmt} | {rate_fmt} | [{elapsed}<{remaining}] |"
+    )
+    progress_bar = tqdm(
+        total=frame_count,
+        desc="Processing",
+        unit="frame",
+        bar_format=progress_bar_format,
+        colour="green",
+    )
+    progress_interval = 0.1  # Print progress every 10% of frames
+    progress_threshold = math.floor(frame_count * progress_interval)
+
+    redacted_clip = VideoClip(
+        make_frame=lambda t: _make_frame(
+            t,
+            final,
+            progress_bar,
+            progress_threshold,
+        ),
+        duration=final.duration,
+    )  # Redact the clip
+
+    scrubbed_file = mp4_file[:-4] + "_scrubbed.mp4"
+    redacted_clip.write_videofile(
+        scrubbed_file, fps=final.fps, logger=None
+    )  # Write the redacted clip to a file
+
+    progress_bar.close()
+    return "Scrubbed File Saved at: " + scrubbed_file
+
+
+if __name__ == "__main__":
+    fire.Fire(utils.get_functions(__name__))
diff --git a/openadapt/scrub.py b/openadapt/scrub.py
@@ -45,6 +45,7 @@ def scrub_text(text: str, is_separated: bool = False) -> str:
     Returns:
         str: Scrubbed text
     """
+
     if text is None:
         return None
 
@@ -100,6 +101,7 @@ def scrub_image(
     Returns:
         PIL.Image: The scrubbed image with PII and PHI removed.
     """
+
     redacted_image = IMAGE_REDACTOR.redact(
         image, fill=fill_color, entities=SCRUBBING_ENTITIES
     )
@@ -159,6 +161,7 @@ def _scrub_text_item(
     Returns:
         str: The scrubbed value
     """
+
     if key in ("text", "canonical_text"):
         return scrub_text(value, is_separated=True)
     if force_scrub_children:
@@ -203,6 +206,7 @@ def _scrub_list_item(
     Returns:
         dict/str: The scrubbed dict/value respectively
     """
+
     if isinstance(item, dict):
         return scrub_dict(
             item, list_keys, force_scrub_children=force_scrub_children
@@ -225,6 +229,7 @@ def scrub_dict(
     Returns:
         dict: The scrubbed dict with PII and PHI removed.
     """
+
     if list_keys is None:
         list_keys = config.SCRUB_KEYS_HTML
 
@@ -270,6 +275,7 @@ def scrub_list_dicts(input_list: List[Dict], list_keys: List = None) -> List[Dic
     Returns:
         list[dict]: The scrubbed list of dicts with PII and PHI removed.
     """
+
     scrubbed_list_dicts = []
     for input_dict in input_list:
         scrubbed_list_dicts.append(scrub_dict(input_dict, list_keys))

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -55,6 +55,7 @@ torchvision = "^0.15.2"
 sumy = "0.11.0"
 nltk = "3.8.1"
 pywinauto = {version = "^0.6.8", markers = "sys_platform == 'win32'"}
+moviepy = "1.0.3"
 python-levenshtein = "^0.21.1"
 magic-wormhole = "0.12.0"
 

diff --git a/requirements.txt b/requirements.txt
@@ -11,6 +11,7 @@ fuzzywuzzy==0.18.0
 ipdb==0.13.11
 loguru==0.6.0
 matplotlib==3.6.2
+moviepy==1.0.3
 mss==6.1.0
 nltk==3.8.1
 openai==0.27.5
@@ -38,4 +39,4 @@ magic-wormhole==0.12.0
 nicegui==1.2.16
 transformers==4.29.2
 python-dotenv==1.0.0
-python-Levenshtein==0.21.1
+python-Levenshtein==0.21.1