From c57ba7ba3b6fa4414b32e9e79ca5f45b76bef2e8 Mon Sep 17 00:00:00 2001
From: Benjamin Bengfort <benjamin@rotational.io>
Date: Tue, 12 Nov 2024 18:17:48 -0500
Subject: [PATCH] Moondream Content Moderation (#3)

---
 .github/workflows/tests.yaml |  2 +-
 .gitignore                   |  2 ++
 README.md                    | 19 +++++++++++
 construe/__main__.py         |  7 ++++
 construe/datasets.py         | 10 ++++++
 construe/moondream.py        | 65 ++++++++++++++++++++++++++++++++++++
 datasets/.gitkeep            |  0
 requirements.txt             | 14 +++++++-
 8 files changed, 117 insertions(+), 2 deletions(-)
 create mode 100644 construe/datasets.py
 create mode 100644 construe/moondream.py
 create mode 100644 datasets/.gitkeep

diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index f5358fd..ff6f317 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -15,7 +15,7 @@ jobs:
         working-directory: ${{ github.workspace }}/llm-benchmark
     strategy:
       matrix:
-        python-version: ["3.11.x", "3.12.x", "3.13.x"]
+        python-version: ["3.11.x", "3.12.x"]
 
     steps:
       - name: Checkout Code
diff --git a/.gitignore b/.gitignore
index 1348066..98e217d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -160,3 +160,5 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+
+datasets
\ No newline at end of file
diff --git a/README.md b/README.md
index 8f79eba..3114bab 100644
--- a/README.md
+++ b/README.md
@@ -39,6 +39,25 @@ Options:
   -h, --help                 Show this message and exit.
 ```
 
+## Moondream Benchmarks
+
+The [moondream](https://huggingface.co/vikhyatk/moondream2) package contains small image-to-text computer vision models that can be used in the first step of a [content moderation](https://www.cloudraft.io/blog/content-moderation-using-llamaindex-and-llm) workflow (e.g. image to text, moderate text). This benchmark executes the model for _encoding_ and _inferencing_ on a small number of images and reports the average time for both operations and the line-by-line memory usage of the model.
+
+It can be run as follows:
+
+```
+$ construe moondream
+```
+
+Command usage is as follows:
+
+```
+Usage: construe moondream [OPTIONS]
+
+Options:
+  -h, --help  Show this message and exit.
+```
+
 ## Releases
 
 To release the construe library and deploy to PyPI run the following commands:
diff --git a/construe/__main__.py b/construe/__main__.py
index 316d006..3ff7f75 100644
--- a/construe/__main__.py
+++ b/construe/__main__.py
@@ -6,6 +6,7 @@
 
 from .version import get_version
 from .basic import BasicBenchmark
+from .moondream import MoonDreamBenchmark
 
 
 CONTEXT_SETTINGS = {
@@ -57,6 +58,12 @@ def basic(**kwargs):
     benchmark.run()
 
 
+@main.command()
+def moondream(**kwargs):
+    benchmark = MoonDreamBenchmark(**kwargs)
+    benchmark.run()
+
+
 if __name__ == "__main__":
     main(
         prog_name="construe",
diff --git a/construe/datasets.py b/construe/datasets.py
new file mode 100644
index 0000000..ba5278e
--- /dev/null
+++ b/construe/datasets.py
@@ -0,0 +1,10 @@
+"""
+Manages datasets used for inferencing
+"""
+
+import glob
+
+
+def load_content_moderation():
+    for path in glob.glob("datasets/content-moderation/**/*"):
+        yield path
diff --git a/construe/moondream.py b/construe/moondream.py
new file mode 100644
index 0000000..ec29c16
--- /dev/null
+++ b/construe/moondream.py
@@ -0,0 +1,65 @@
+"""
+Moondream is a computer vision model (image to text) that is optimized for use
+on embedded devices and serves as an example model in content moderation use
+cases where the image is captioned and then the caption is moderated.
+"""
+
+import time
+import tqdm
+import numpy as np
+
+from PIL import Image
+from memory_profiler import profile
+from construe.datasets import load_content_moderation
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+
+MODEL_ID = "vikhyatk/moondream2"
+REVISION = "2024-08-26"
+
+
+class MoonDreamBenchmark(object):
+
+    def __init__(self):
+        self.moondream = MoonDreamProfiler()
+        self.dataset = list(load_content_moderation())
+
+    @profile
+    def run(self):
+        results = []
+        for path in tqdm.tqdm(self.dataset):
+            encoded, encode_time = self.moondream.encode_image(path)
+            inference_time = self.moondream.inference(encoded)
+            results.append((encode_time, inference_time))
+
+        encode_mean = np.array([result[0] for result in results]).mean()
+        inference_mean = np.array([result[1] for result in results]).mean()
+
+        print(f"Encode Average: {encode_mean:0.2f}")
+        print(f"Inference Average: {inference_mean:0.2f}")
+
+
+class MoonDreamProfiler(object):
+
+    def __init__(self):
+        self.load_model()
+
+    def load_model(self):
+        self.model = AutoModelForCausalLM.from_pretrained(
+            MODEL_ID, trust_remote_code=True, revision=REVISION,
+        )
+        self.tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, revision=REVISION)
+
+    def encode_image(self, path):
+        image = Image.open(path)
+        start = time.perf_counter()
+        encoded = self.model.encode_image(image)
+        delta = time.perf_counter() - start
+        return encoded, delta
+
+    def inference(self, image):
+        start = time.perf_counter()
+        self.model.answer_question(
+            image, "Describe this image in detail with transparency.", self.tokenizer
+        )
+        return time.perf_counter() - start
diff --git a/datasets/.gitkeep b/datasets/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/requirements.txt b/requirements.txt
index 2ca7404..3128edc 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,15 @@
 # Primary Dependencies
 click==8.1.7
+einops==0.8.0
+memory-profiler==0.61.0
 numpy==2.1.3
+pillow==11.0.0
 python-dotenv==1.0.1
 requests==2.32.3
 torch==2.5.1
+torchvision==0.20.1
+tqdm==4.67.0
+transformers==4.46.2
 
 # Packaging Dependencies
 # black==24.10.0
@@ -28,6 +34,7 @@ torch==2.5.1
 # docutils==0.21.2
 # filelock==3.16.1
 # fsspec==2024.10.0
+# huggingface-hub==0.26.2
 # idna==3.10
 # importlib_metadata==8.5.0
 # iniconfig==2.0.0
@@ -48,13 +55,18 @@ torch==2.5.1
 # pkginfo==1.10.0
 # platformdirs==4.3.6
 # pluggy==1.5.0
+# psutil==6.1.0
 # Pygments==2.18.0
 # pyproject_hooks==1.2.0
+# PyYAML==6.0.2
 # readme_renderer==44.0
+# regex==2024.11.6
 # requests-toolbelt==1.0.0
 # rfc3986==2.0.0
 # rich==13.9.4
+# safetensors==0.4.5
 # sympy==1.13.1
+# tokenizers==0.20.3
 # typing_extensions==4.12.2
 # urllib3==2.2.3
-# zipp==3.21.0
+# zipp==3.21.0
\ No newline at end of file