From c57ba7ba3b6fa4414b32e9e79ca5f45b76bef2e8 Mon Sep 17 00:00:00 2001 From: Benjamin Bengfort Date: Tue, 12 Nov 2024 18:17:48 -0500 Subject: [PATCH] Moondream Content Moderation (#3) --- .github/workflows/tests.yaml | 2 +- .gitignore | 2 ++ README.md | 19 +++++++++++ construe/__main__.py | 7 ++++ construe/datasets.py | 10 ++++++ construe/moondream.py | 65 ++++++++++++++++++++++++++++++++++++ datasets/.gitkeep | 0 requirements.txt | 14 +++++++- 8 files changed, 117 insertions(+), 2 deletions(-) create mode 100644 construe/datasets.py create mode 100644 construe/moondream.py create mode 100644 datasets/.gitkeep diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index f5358fd..ff6f317 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -15,7 +15,7 @@ jobs: working-directory: ${{ github.workspace }}/llm-benchmark strategy: matrix: - python-version: ["3.11.x", "3.12.x", "3.13.x"] + python-version: ["3.11.x", "3.12.x"] steps: - name: Checkout Code diff --git a/.gitignore b/.gitignore index 1348066..98e217d 100644 --- a/.gitignore +++ b/.gitignore @@ -160,3 +160,5 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +datasets \ No newline at end of file diff --git a/README.md b/README.md index 8f79eba..3114bab 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,25 @@ Options: -h, --help Show this message and exit. ``` +## Moondream Benchmarks + +The [moondream](https://huggingface.co/vikhyatk/moondream2) package contains small image-to-text computer vision models that can be used in the first step of a [content moderation](https://www.cloudraft.io/blog/content-moderation-using-llamaindex-and-llm) workflow (e.g. image to text, moderate text). This benchmark executes the model for _encoding_ and _inferencing_ on a small number of images and reports the average time for both operations and the line-by-line memory usage of the model. + +It can be run as follows: + +``` +$ construe moondream +``` + +Command usage is as follows: + +``` +Usage: construe moondream [OPTIONS] + +Options: + -h, --help Show this message and exit. +``` + ## Releases To release the construe library and deploy to PyPI run the following commands: diff --git a/construe/__main__.py b/construe/__main__.py index 316d006..3ff7f75 100644 --- a/construe/__main__.py +++ b/construe/__main__.py @@ -6,6 +6,7 @@ from .version import get_version from .basic import BasicBenchmark +from .moondream import MoonDreamBenchmark CONTEXT_SETTINGS = { @@ -57,6 +58,12 @@ def basic(**kwargs): benchmark.run() +@main.command() +def moondream(**kwargs): + benchmark = MoonDreamBenchmark(**kwargs) + benchmark.run() + + if __name__ == "__main__": main( prog_name="construe", diff --git a/construe/datasets.py b/construe/datasets.py new file mode 100644 index 0000000..ba5278e --- /dev/null +++ b/construe/datasets.py @@ -0,0 +1,10 @@ +""" +Manages datasets used for inferencing +""" + +import glob + + +def load_content_moderation(): + for path in glob.glob("datasets/content-moderation/**/*"): + yield path diff --git a/construe/moondream.py b/construe/moondream.py new file mode 100644 index 0000000..ec29c16 --- /dev/null +++ b/construe/moondream.py @@ -0,0 +1,65 @@ +""" +Moondream is a computer vision model (image to text) that is optimized for use +on embedded devices and serves as an example model in content moderation use +cases where the image is captioned and then the caption is moderated. +""" + +import time +import tqdm +import numpy as np + +from PIL import Image +from memory_profiler import profile +from construe.datasets import load_content_moderation +from transformers import AutoModelForCausalLM, AutoTokenizer + + +MODEL_ID = "vikhyatk/moondream2" +REVISION = "2024-08-26" + + +class MoonDreamBenchmark(object): + + def __init__(self): + self.moondream = MoonDreamProfiler() + self.dataset = list(load_content_moderation()) + + @profile + def run(self): + results = [] + for path in tqdm.tqdm(self.dataset): + encoded, encode_time = self.moondream.encode_image(path) + inference_time = self.moondream.inference(encoded) + results.append((encode_time, inference_time)) + + encode_mean = np.array([result[0] for result in results]).mean() + inference_mean = np.array([result[1] for result in results]).mean() + + print(f"Encode Average: {encode_mean:0.2f}") + print(f"Inference Average: {inference_mean:0.2f}") + + +class MoonDreamProfiler(object): + + def __init__(self): + self.load_model() + + def load_model(self): + self.model = AutoModelForCausalLM.from_pretrained( + MODEL_ID, trust_remote_code=True, revision=REVISION, + ) + self.tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, revision=REVISION) + + def encode_image(self, path): + image = Image.open(path) + start = time.perf_counter() + encoded = self.model.encode_image(image) + delta = time.perf_counter() - start + return encoded, delta + + def inference(self, image): + start = time.perf_counter() + self.model.answer_question( + image, "Describe this image in detail with transparency.", self.tokenizer + ) + return time.perf_counter() - start diff --git a/datasets/.gitkeep b/datasets/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/requirements.txt b/requirements.txt index 2ca7404..3128edc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,15 @@ # Primary Dependencies click==8.1.7 +einops==0.8.0 +memory-profiler==0.61.0 numpy==2.1.3 +pillow==11.0.0 python-dotenv==1.0.1 requests==2.32.3 torch==2.5.1 +torchvision==0.20.1 +tqdm==4.67.0 +transformers==4.46.2 # Packaging Dependencies # black==24.10.0 @@ -28,6 +34,7 @@ torch==2.5.1 # docutils==0.21.2 # filelock==3.16.1 # fsspec==2024.10.0 +# huggingface-hub==0.26.2 # idna==3.10 # importlib_metadata==8.5.0 # iniconfig==2.0.0 @@ -48,13 +55,18 @@ torch==2.5.1 # pkginfo==1.10.0 # platformdirs==4.3.6 # pluggy==1.5.0 +# psutil==6.1.0 # Pygments==2.18.0 # pyproject_hooks==1.2.0 +# PyYAML==6.0.2 # readme_renderer==44.0 +# regex==2024.11.6 # requests-toolbelt==1.0.0 # rfc3986==2.0.0 # rich==13.9.4 +# safetensors==0.4.5 # sympy==1.13.1 +# tokenizers==0.20.3 # typing_extensions==4.12.2 # urllib3==2.2.3 -# zipp==3.21.0 +# zipp==3.21.0 \ No newline at end of file