Moondream Content Moderation (#3)

rotationalio · Nov 12, 2024 · c57ba7b · c57ba7b
1 parent aaf47a3
commit c57ba7b
Show file tree

Hide file tree

Showing 8 changed files with 117 additions and 2 deletions.
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
@@ -15,7 +15,7 @@ jobs:
         working-directory: ${{ github.workspace }}/llm-benchmark
     strategy:
       matrix:
-        python-version: ["3.11.x", "3.12.x", "3.13.x"]
+        python-version: ["3.11.x", "3.12.x"]
 
     steps:
       - name: Checkout Code

diff --git a/.gitignore b/.gitignore
@@ -160,3 +160,5 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+
+datasets
diff --git a/README.md b/README.md
@@ -39,6 +39,25 @@ Options:
   -h, --help                 Show this message and exit.
 ```
 
+## Moondream Benchmarks
+
+The [moondream](https://huggingface.co/vikhyatk/moondream2) package contains small image-to-text computer vision models that can be used in the first step of a [content moderation](https://www.cloudraft.io/blog/content-moderation-using-llamaindex-and-llm) workflow (e.g. image to text, moderate text). This benchmark executes the model for _encoding_ and _inferencing_ on a small number of images and reports the average time for both operations and the line-by-line memory usage of the model.
+
+It can be run as follows:
+
+```
+$ construe moondream
+```
+
+Command usage is as follows:
+
+```
+Usage: construe moondream [OPTIONS]
+
+Options:
+  -h, --help  Show this message and exit.
+```
+
 ## Releases
 
 To release the construe library and deploy to PyPI run the following commands:

diff --git a/construe/__main__.py b/construe/__main__.py
@@ -6,6 +6,7 @@
 
 from .version import get_version
 from .basic import BasicBenchmark
+from .moondream import MoonDreamBenchmark
 
 
 CONTEXT_SETTINGS = {
@@ -57,6 +58,12 @@ def basic(**kwargs):
     benchmark.run()
 
 
+@main.command()
+def moondream(**kwargs):
+    benchmark = MoonDreamBenchmark(**kwargs)
+    benchmark.run()
+
+
 if __name__ == "__main__":
     main(
         prog_name="construe",

diff --git a/construe/datasets.py b/construe/datasets.py
@@ -0,0 +1,10 @@
+"""
+Manages datasets used for inferencing
+"""
+
+import glob
+
+
+def load_content_moderation():
+    for path in glob.glob("datasets/content-moderation/**/*"):
+        yield path
diff --git a/construe/moondream.py b/construe/moondream.py
@@ -0,0 +1,65 @@
+"""
+Moondream is a computer vision model (image to text) that is optimized for use
+on embedded devices and serves as an example model in content moderation use
+cases where the image is captioned and then the caption is moderated.
+"""
+
+import time
+import tqdm
+import numpy as np
+
+from PIL import Image
+from memory_profiler import profile
+from construe.datasets import load_content_moderation
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+
+MODEL_ID = "vikhyatk/moondream2"
+REVISION = "2024-08-26"
+
+
+class MoonDreamBenchmark(object):
+
+    def __init__(self):
+        self.moondream = MoonDreamProfiler()
+        self.dataset = list(load_content_moderation())
+
+    @profile
+    def run(self):
+        results = []
+        for path in tqdm.tqdm(self.dataset):
+            encoded, encode_time = self.moondream.encode_image(path)
+            inference_time = self.moondream.inference(encoded)
+            results.append((encode_time, inference_time))
+
+        encode_mean = np.array([result[0] for result in results]).mean()
+        inference_mean = np.array([result[1] for result in results]).mean()
+
+        print(f"Encode Average: {encode_mean:0.2f}")
+        print(f"Inference Average: {inference_mean:0.2f}")
+
+
+class MoonDreamProfiler(object):
+
+    def __init__(self):
+        self.load_model()
+
+    def load_model(self):
+        self.model = AutoModelForCausalLM.from_pretrained(
+            MODEL_ID, trust_remote_code=True, revision=REVISION,
+        )
+        self.tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, revision=REVISION)
+
+    def encode_image(self, path):
+        image = Image.open(path)
+        start = time.perf_counter()
+        encoded = self.model.encode_image(image)
+        delta = time.perf_counter() - start
+        return encoded, delta
+
+    def inference(self, image):
+        start = time.perf_counter()
+        self.model.answer_question(
+            image, "Describe this image in detail with transparency.", self.tokenizer
+        )
+        return time.perf_counter() - start
diff --git a/datasets/.gitkeep b/datasets/.gitkeep
diff --git a/requirements.txt b/requirements.txt
@@ -1,9 +1,15 @@
 # Primary Dependencies
 click==8.1.7
+einops==0.8.0
+memory-profiler==0.61.0
 numpy==2.1.3
+pillow==11.0.0
 python-dotenv==1.0.1
 requests==2.32.3
 torch==2.5.1
+torchvision==0.20.1
+tqdm==4.67.0
+transformers==4.46.2
 
 # Packaging Dependencies
 # black==24.10.0
@@ -28,6 +34,7 @@ torch==2.5.1
 # docutils==0.21.2
 # filelock==3.16.1
 # fsspec==2024.10.0
+# huggingface-hub==0.26.2
 # idna==3.10
 # importlib_metadata==8.5.0
 # iniconfig==2.0.0
@@ -48,13 +55,18 @@ torch==2.5.1
 # pkginfo==1.10.0
 # platformdirs==4.3.6
 # pluggy==1.5.0
+# psutil==6.1.0
 # Pygments==2.18.0
 # pyproject_hooks==1.2.0
+# PyYAML==6.0.2
 # readme_renderer==44.0
+# regex==2024.11.6
 # requests-toolbelt==1.0.0
 # rfc3986==2.0.0
 # rich==13.9.4
+# safetensors==0.4.5
 # sympy==1.13.1
+# tokenizers==0.20.3
 # typing_extensions==4.12.2
 # urllib3==2.2.3
-# zipp==3.21.0
+# zipp==3.21.0