feat: make vllm optional

jitsi · Feb 7, 2025 · 72b1d0e · 72b1d0e
1 parent e623073
commit 72b1d0e
Show file tree

Hide file tree

Showing 9 changed files with 226 additions and 97 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -17,7 +17,7 @@ RUN \
     apt-dpkg-wrap apt-get install -y build-essential libcurl4-openssl-dev python3.11 python3.11-venv && \
     apt-cleanup
 
-COPY requirements.txt /app/
+COPY requirements-vllm.txt /app/
 
 WORKDIR /app
 
@@ -26,7 +26,7 @@ ENV PIP_DISABLE_PIP_VERSION_CHECK=on
 RUN \
     python3.11 -m venv .venv && \
     . .venv/bin/activate && \
-    pip install -vvv -r requirements.txt
+    pip install -vvv -r requirements-vllm.txt
 
 ## Build ffmpeg
 

diff --git a/README.md b/README.md
@@ -4,7 +4,7 @@ Skynet is an API server for AI services wrapping several apps and models.
 
 It is comprised of specialized modules which can be enabled or disabled as needed.
 
-- **Summary and Action Items** with vllm (or llama.cpp)
+- **Summary and Action Items** with vllm (or Ollama)
 - **Live Transcriptions** with Faster Whisper via websockets
 - 🚧 _More to follow_
 
@@ -16,7 +16,7 @@ It is comprised of specialized modules which can be enabled or disabled as neede
 ## Summaries Quickstart
 
 ```bash
-# if VLLM cannot be used, make sure to have Ollama started. In that case LLAMA_PATH should be the model name, like "llama3.1".
+# If VLLM cannot be used, make sure to have Ollama started. In that case LLAMA_PATH should be the model name, like "llama3.1".
 export LLAMA_PATH="$HOME/models/Llama-3.1-8B-Instruct"
 
 # disable authorization (for testing)
@@ -25,7 +25,7 @@ export BYPASS_AUTHORIZATION=1
 # start Redis
 docker run -d --rm -p 6379:6379 redis 
 
-poetry install
+poetry install --with vllm
 ./run.sh
 
 # open http://localhost:8000/summaries/docs in a browser

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -19,6 +19,12 @@ pytest = "7.4.4"
 pytest-asyncio = "0.23.3"
 pytest-mock = "3.12.0"
 
+[tool.poetry.group.vllm]
+optional = true
+
+[tool.poetry.group.vllm.dependencies]
+vllm = "0.7.0"
+
 [tool.poetry.dependencies]
 aiofiles = "24.1.0"
 aiohttp = "3.11.9"
@@ -42,7 +48,6 @@ torch = "2.5.1"
 torchaudio = "2.5.1"
 uuid6 = "2024.07.10"
 uvicorn = "0.32.1"
-vllm = "0.7.0"
 oci = "^2.144.0"
 langchain-community = "0.3.8"
 

diff --git a/requirements-vllm.txt b/requirements-vllm.txt
diff --git a/requirements.txt b/requirements.txt
diff --git a/skynet/env.py b/skynet/env.py
@@ -9,7 +9,12 @@
 is_mac = sys.platform == 'darwin'
 
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
-use_vllm = device == 'cuda'
+try:
+    import vllm
+except ImportError:
+    use_vllm = False
+else:
+    use_vllm = device == 'cuda'
 
 
 # utilities

diff --git a/skynet/modules/ttt/openai_api/app.py b/skynet/modules/ttt/openai_api/app.py
@@ -6,8 +6,6 @@
 from fastapi import HTTPException, Request
 from fastapi.responses import JSONResponse, StreamingResponse
 
-from vllm.entrypoints.openai.api_server import router as vllm_router
-
 from skynet import http_client
 from skynet.auth.bearer import JWTBearer
 from skynet.env import bypass_auth, llama_n_ctx, llama_path, openai_api_base_url, use_oci, use_vllm, vllm_server_port
@@ -16,11 +14,19 @@
 
 log = get_logger(__name__)
 
+app = create_app()
+whitelisted_routes = []
+
 
 def initialize():
     if not use_vllm:
         return
 
+    from vllm.entrypoints.openai.api_server import router as vllm_router
+
+    app.include_router(vllm_router, dependencies=dependencies, responses=responses)
+    whitelisted_routes.extend(['/openai/docs', '/openai/openapi.json'])
+
     log.info('Starting OpenAI API server...')
 
     proc = subprocess.Popen(
@@ -56,11 +62,6 @@ async def is_ready():
         return False
 
 
-app = create_app()
-app.include_router(vllm_router, dependencies=dependencies, responses=responses)
-
-whitelisted_routes = ['/openai/docs', '/openai/openapi.json']
-
 bearer = JWTBearer()
 
 

diff --git a/sync-requirements.sh b/sync-requirements.sh
@@ -2,3 +2,7 @@
 
 poetry export --without-hashes --format=requirements.txt > requirements.txt
 git add requirements.txt
+
+poetry export --with vllm --without-hashes --format=requirements.txt > requirements-vllm.txt
+git add requirements-vllm.txt
+